diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/.gitignore b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..306db4fda1fea41ff79900b4aaf55ccd2b222e1b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/.gitignore
@@ -0,0 +1,121 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+data
+.vscode
+.idea
+
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+work_dirs
+pretrained
+pretrained/
+# Pytorch
+*.pth
+trash/
+trash
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/LICENSE b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..c14f578420e1f6ff6f60ca714839d0ed329a1051
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/LICENSE
@@ -0,0 +1,64 @@
+NVIDIA Source Code License for SegFormer
+
+1. Definitions
+
+“Licensor” means any person or entity that distributes its Work.
+
+“Software” means the original work of authorship made available under this License.
+
+“Work” means the Software and any additions to or derivative works of the Software that are made available under
+this License.
+
+The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under
+U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include
+works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
+
+Works, including the Software, are “made available” under this License by including in or with the Work either
+(a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
+
+2. License Grant
+
+2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual,
+worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly
+display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
+
+3. Limitations
+
+3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you
+include a complete copy of this License with your distribution, and (c) you retain without modification any
+copyright, patent, trademark, or attribution notices that are present in the Work.
+
+3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and
+distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use
+limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works
+that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution
+requirements in Section 3.1) will continue to apply to the Work itself.
+
+3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use
+non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative
+works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
+
+3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim,
+cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then
+your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately.
+
+3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos,
+or trademarks, except as necessary to reproduce the notices described in this License.
+
+3.6 Termination. If you violate any term of this License, then your rights under this License (including the
+grant in Section 2.1) will terminate immediately.
+
+4. Disclaimer of Warranty.
+
+THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU
+BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
+
+5. Limitation of Liability.
+
+EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING
+NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
+INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR
+INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR
+DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..27f8edffb2609e961d8cdf00c5f75443029e2801
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/README.md
@@ -0,0 +1,65 @@
+# Segformer
+
+- 参考实现：
+```
+url=https://github.com/NVlabs/SegFormer
+```
+
+# Requirements
+
+- install requirement
+
+  ```
+  pip install docutils myst-parser sphinx sphinx_copybutton sphinx_markdown_tables
+  pip install -e git+https://github.com/gaotongxiao/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
+  pip install cityscapesscripts
+  pip install matplotlib mmcls numpy packaging prettytable
+  pip install codecov flake8 interrogate pytest xdoctest yapf
+  ```
+  
+  安装修改过的`mmcv-1.2.7`库，
+  
+  ```shell
+  # 卸载已安装的mmcv-1.2.7库
+  pip3 uninstall mmcv-1.2.7
+  
+  # 安装修改过的mmcv-1.2.7库
+  cd mmcv-1.2.7
+  python3 setup.py install
+  ```
+  torch 和 apex要固定为ascend20220315版本，之后的版本会出错（SyncBN报错）。
+  另外还需要在项目目录下新建`pretrained`文件夹，并在`pretrained`路径下添加文件mit_b0.pth，文件获取路径：
+  obs://ascend-pytorch-model-file/验收-训练/cv/semantic_segmentation/segformer/mit_b0.pth
+
+# 精度性能
+
+|  名称  | 精度  | 性能 |
+| :----: | :---: | :--: |
+| GPU-1p |   -   | 8.75  |
+| GPU-8p | 76.91  | 50.16 |
+| NPU-1p |   -   | 8.82  |
+| NPU-8p | 76.40 | 54.48 |
+
+# 自验报告
+```shell    
+# 1p train perf
+# 是否正确输出了性能log文件
+bash test/train_performance_1p.sh --data_path=real_data_path
+# 验收结果： OK
+    
+# 8p train perf
+# 是否正确输出了性能log文件
+bash test/train_performance_8p.sh --data_path=real_data_path
+# 验收结果： OK 
+
+# 1p train full
+# 是否正确输出了性能精度log文件，是否正确保存了模型文件
+bash test/train_full_1p.sh --data_path=real_data_path
+# 验收结果： OK 
+
+# 8p train full
+# 是否正确输出了性能精度log文件，是否正确保存了模型文件
+bash test/train_full_8p.sh --data_path=real_data_path
+# 验收结果： OK 
+
+```
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/bind_pyt.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/bind_pyt.py
new file mode 100644
index 0000000000000000000000000000000000000000..d05a5a495594f5b729f4ae371b7260cbb273ae0c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/bind_pyt.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2019-2021 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import subprocess
+import os
+import socket
+from argparse import ArgumentParser, REMAINDER
+
+import torch
+
+
+def parse_args():
+    """
+    Helper function parsing the command line options
+    @retval ArgumentParser
+    """
+    parser = ArgumentParser(description="PyTorch distributed training launch "
+                                        "helper utilty that will spawn up "
+                                        "multiple distributed processes")
+
+    # Optional arguments for the launch helper
+    parser.add_argument("--nnodes", type=int, default=1,
+                        help="The number of nodes to use for distributed "
+                             "training")
+    parser.add_argument("--node_rank", type=int, default=0,
+                        help="The rank of the node for multi-node distributed "
+                             "training")
+    parser.add_argument("--nproc_per_node", type=int, default=8,
+                        help="The number of processes to launch on each node, "
+                             "for GPU training, this is recommended to be set "
+                             "to the number of GPUs in your system so that "
+                             "each process can be bound to a single GPU.")
+    parser.add_argument("--master_addr", default="127.0.0.1", type=str,
+                        help="Master node (rank 0)'s address, should be either "
+                             "the IP address or the hostname of node 0, for "
+                             "single node multi-proc training, the "
+                             "--master_addr can simply be 127.0.0.1")
+    parser.add_argument("--master_port", default=29688, type=int,
+                        help="Master node (rank 0)'s free port that needs to "
+                             "be used for communciation during distributed "
+                             "training")
+    parser.add_argument('--no_hyperthreads', action='store_true',
+                        help='Flag to disable binding to hyperthreads')
+    parser.add_argument('--no_membind', action='store_true',
+                        help='Flag to disable memory binding')
+
+    # non-optional arguments for binding
+    parser.add_argument("--nsockets_per_node", type=int, required=True,
+                        help="Number of CPU sockets on a node")
+    parser.add_argument("--ncores_per_socket", type=int, required=True,
+                        help="Number of CPU cores per socket")
+
+    # positional
+    parser.add_argument("training_script", type=str,
+                        help="The full path to the single GPU training "
+                             "program/script to be launched in parallel, "
+                             "followed by all the arguments for the "
+                             "training script")
+
+    # rest from the training program
+    parser.add_argument("--data_path", type=str, default='')
+    parser.add_argument('training_script_args', nargs=REMAINDER)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    # variables for numactrl binding
+
+    NSOCKETS = args.nsockets_per_node
+    NGPUS_PER_SOCKET = (args.nproc_per_node // args.nsockets_per_node) + (
+        1 if (args.nproc_per_node % args.nsockets_per_node) else 0)
+    NCORES_PER_GPU = args.ncores_per_socket // NGPUS_PER_SOCKET
+
+    # world size in terms of number of processes
+    dist_world_size = args.nproc_per_node * args.nnodes
+
+    # set PyTorch distributed related environmental variables
+    current_env = os.environ.copy()
+    current_env["MASTER_ADDR"] = args.master_addr
+    current_env["MASTER_PORT"] = str(args.master_port)
+    current_env["WORLD_SIZE"] = str(dist_world_size)
+    current_env['NODE_RANK'] = str(args.node_rank)
+
+    processes = []
+
+    for local_rank in range(0, args.nproc_per_node):
+        # each process's rank
+        dist_rank = args.nproc_per_node * args.node_rank + local_rank
+        current_env["RANK"] = str(dist_rank)
+        current_env['LOCAL_RANK'] = str(local_rank)
+
+        # form numactrl binding command
+        cpu_ranges = [local_rank * NCORES_PER_GPU,
+                      (local_rank + 1) * NCORES_PER_GPU - 1,
+                      local_rank * NCORES_PER_GPU + (NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS),
+                      (local_rank + 1) * NCORES_PER_GPU + (NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS) - 1]
+
+        numactlargs = []
+        if args.no_hyperthreads:
+            numactlargs += ["--physcpubind={}-{}".format(*cpu_ranges[0:2])]
+        else:
+            numactlargs += ["--physcpubind={}-{},{}-{}".format(*cpu_ranges)]
+
+        if not args.no_membind:
+            memnode = local_rank // NGPUS_PER_SOCKET
+            numactlargs += ["--membind={}".format(memnode)]
+
+        # spawn the processes
+        cmd = ["/usr/bin/numactl"] \
+              + numactlargs \
+              + [sys.executable,
+                 "-u",
+                 args.training_script,
+                 "--local_rank={}".format(local_rank)
+                 ] \
+              + args.training_script_args
+
+        process = subprocess.Popen(cmd, env=current_env)
+        processes.append(process)
+
+    for process in processes:
+        process.wait()
+
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..efc8b4bb20c981f3db6df7eb52b3dc0744c94cc0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/ade20k.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'ADE20KDataset'
+data_root = 'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', reduce_zero_label=True),
+    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/training',
+        ann_dir='annotations/training',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..298594ea925f87f22b37094a2ec50e370aec96a0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/chase_db1.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'ChaseDB1Dataset'
+data_root = 'data/CHASE_DB1'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (960, 999)
+crop_size = (128, 128)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f21867c63e1835f6fceb61f066e802fd8fd2a735
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/train',
+        ann_dir='gtFine/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes_768x768.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes_768x768.py
new file mode 100644
index 0000000000000000000000000000000000000000..fde9d7c7d8076dabff081fce0989eec6a6f5ff07
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes_768x768.py
@@ -0,0 +1,35 @@
+_base_ = './cityscapes.py'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (768, 768)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2049, 1025),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes_769x769.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes_769x769.py
new file mode 100644
index 0000000000000000000000000000000000000000..336c7b254fe392b4703039fec86a83acdbd2e1a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/cityscapes_769x769.py
@@ -0,0 +1,35 @@
+_base_ = './cityscapes.py'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (769, 769)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2049, 1025),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..06e8ff606e0d2a4514ec8b7d2c6c436a32efcbf4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/drive.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'DRIVEDataset'
+data_root = 'data/DRIVE'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (584, 565)
+crop_size = (64, 64)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..242d790eb1b83e75cf6b7eaa7a35c674099311ad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/hrf.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'HRFDataset'
+data_root = 'data/HRF'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (2336, 3504)
+crop_size = (256, 256)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff65bad1b86d7e3a5980bb5b9fc55798dc8df5f4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_context.py
@@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'PascalContextDataset'
+data_root = 'data/VOCdevkit/VOC2010/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (520, 520)
+crop_size = (480, 480)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_voc12.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_voc12.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba1d42d0c5781f56dc177d860d856bb34adce555
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_voc12.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'PascalVOCDataset'
+data_root = 'data/VOCdevkit/VOC2012'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/val.txt',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_voc12_aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_voc12_aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f23b6717d53ad29f02dd15046802a2631a5076b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/pascal_voc12_aug.py
@@ -0,0 +1,9 @@
+_base_ = './pascal_voc12.py'
+# dataset settings
+data = dict(
+    train=dict(
+        ann_dir=['SegmentationClass', 'SegmentationClassAug'],
+        split=[
+            'ImageSets/Segmentation/train.txt',
+            'ImageSets/Segmentation/aug.txt'
+        ]))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f71b25488cc11a6b4d582ac52b5a24e1ad1cf8e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/datasets/stare.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'STAREDataset'
+data_root = 'data/STARE'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (605, 700)
+crop_size = (128, 128)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/default_runtime.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/default_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..b564cc4e7e7d9a67dacaaddecb100e4d8f5c005b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/default_runtime.py
@@ -0,0 +1,14 @@
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook', by_epoch=False),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+cudnn_benchmark = True
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ann_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ann_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2cb653827e44e6015b3b83bc578003e614a6aa1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ann_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='ANNHead',
+        in_channels=[1024, 2048],
+        in_index=[2, 3],
+        channels=512,
+        project_channels=256,
+        query_scales=(1, ),
+        key_pool_scales=(1, 3, 6, 8),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/apcnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/apcnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8f5316cbcf3896ba9de7ca2c801eba512f01d5e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/apcnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='APCHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ccnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ccnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..794148f576b9e215c3c6963e73dffe98204b7717
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ccnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='CCHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        recurrence=2,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/cgnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/cgnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..eff8d9458c877c5db894957e0b1b4597e40da6ab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/cgnet.py
@@ -0,0 +1,35 @@
+# model settings
+norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='CGNet',
+        norm_cfg=norm_cfg,
+        in_channels=3,
+        num_channels=(32, 64, 128),
+        num_blocks=(3, 21),
+        dilations=(2, 4),
+        reductions=(8, 16)),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=256,
+        in_index=2,
+        channels=256,
+        num_convs=0,
+        concat_input=False,
+        dropout_ratio=0,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        loss_decode=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=False,
+            loss_weight=1.0,
+            class_weight=[
+                2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
+                10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
+                10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
+                10.396974, 10.055647
+            ])),
+    # model training and testing settings
+    train_cfg=dict(sampler=None),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/danet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/danet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c934939fac48525f22ad86f489a041dd7db7d09
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/danet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DAHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pam_channels=64,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7a43bee01422ad4795dd27874e0cd4bb6cbfecf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='ASPPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dilations=(1, 12, 24, 36),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3_unet_s5-d16.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cd262999d8b2cb8e14a5c32190ae73f479d8e81
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3_unet_s5-d16.py
@@ -0,0 +1,50 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='UNet',
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        with_cp=False,
+        conv_cfg=None,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        upsample_cfg=dict(type='InterpConv'),
+        norm_eval=False),
+    decode_head=dict(
+        type='ASPPHead',
+        in_channels=64,
+        in_index=4,
+        channels=16,
+        dilations=(1, 12, 24, 36),
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=128,
+        in_index=3,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=256, stride=170))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3plus_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3plus_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..050e39e091d816df9028d23aa3ecf9db74e441e1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/deeplabv3plus_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DepthwiseSeparableASPPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dilations=(1, 12, 24, 36),
+        c1_in_channels=256,
+        c1_channels=48,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/dmnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/dmnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..d22ba52640bebd805b3b8d07025e276dfb023759
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/dmnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DMHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        filter_sizes=(1, 3, 5, 7),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/dnl_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/dnl_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..edb4c174c51e34c103737ba39bfc48bf831e561d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/dnl_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DNLHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dropout_ratio=0.1,
+        reduction=2,
+        use_scale=True,
+        mode='embedded_gaussian',
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/emanet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/emanet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..26adcd430926de0862204a71d345f2543167f27b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/emanet_r50-d8.py
@@ -0,0 +1,47 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='EMAHead',
+        in_channels=2048,
+        in_index=3,
+        channels=256,
+        ema_channels=512,
+        num_bases=64,
+        num_stages=3,
+        momentum=0.1,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/encnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/encnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..be777123a886503172a95fe0719e956a147bbd68
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/encnet_r50-d8.py
@@ -0,0 +1,48 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='EncHead',
+        in_channels=[512, 1024, 2048],
+        in_index=(1, 2, 3),
+        channels=512,
+        num_codes=32,
+        use_se_loss=True,
+        add_lateral=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_se_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fast_scnn.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fast_scnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..32fdeb659355a5ce5ef2cc7c2f30742703811cdf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fast_scnn.py
@@ -0,0 +1,57 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='FastSCNN',
+        downsample_dw_channels=(32, 48),
+        global_in_channels=64,
+        global_block_channels=(64, 96, 128),
+        global_block_strides=(2, 2, 1),
+        global_out_channels=128,
+        higher_in_channels=64,
+        lower_in_channels=128,
+        fusion_out_channels=128,
+        out_indices=(0, 1, 2),
+        norm_cfg=norm_cfg,
+        align_corners=False),
+    decode_head=dict(
+        type='DepthwiseSeparableFCNHead',
+        in_channels=128,
+        channels=128,
+        concat_input=False,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
+    auxiliary_head=[
+        dict(
+            type='FCNHead',
+            in_channels=128,
+            channels=32,
+            num_convs=1,
+            num_classes=19,
+            in_index=-2,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
+        dict(
+            type='FCNHead',
+            in_channels=64,
+            channels=32,
+            num_convs=1,
+            num_classes=19,
+            in_index=-3,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
+    ],
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_hr18.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_hr18.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3e299bc89ada56ca14bbffcbdb08a586b8ed9e9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_hr18.py
@@ -0,0 +1,52 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://msra/hrnetv2_w18',
+    backbone=dict(
+        type='HRNet',
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(18, 36)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(18, 36, 72)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(18, 36, 72, 144)))),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=[18, 36, 72, 144],
+        in_index=(0, 1, 2, 3),
+        channels=sum([18, 36, 72, 144]),
+        input_transform='resize_concat',
+        kernel_size=1,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=-1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e98f6cc918b6146fc6d613c6918e825ef1355c3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_r50-d8.py
@@ -0,0 +1,45 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        num_convs=2,
+        concat_input=True,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_unet_s5-d16.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_unet_s5-d16.py
new file mode 100644
index 0000000000000000000000000000000000000000..a33e7972877f902d0e7d18401ca675e3e4e60a18
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fcn_unet_s5-d16.py
@@ -0,0 +1,51 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='UNet',
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        with_cp=False,
+        conv_cfg=None,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        upsample_cfg=dict(type='InterpConv'),
+        norm_eval=False),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=64,
+        in_index=4,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=128,
+        in_index=3,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=256, stride=170))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fpn_r50.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fpn_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..86ab327db92e44c14822d65f1c9277cb007f17c1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/fpn_r50.py
@@ -0,0 +1,36 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=4),
+    decode_head=dict(
+        type='FPNHead',
+        in_channels=[256, 256, 256, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/gcnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/gcnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d2ad69f5c22adfe79d5fdabf920217628987166
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/gcnet_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='GCHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        ratio=1 / 4.,
+        pooling_type='att',
+        fusion_types=('channel_add', ),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/lraspp_m-v3-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/lraspp_m-v3-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..93258242a90695cc94a7c6bd41562d6a75988771
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/lraspp_m-v3-d8.py
@@ -0,0 +1,25 @@
+# model settings
+norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='MobileNetV3',
+        arch='large',
+        out_indices=(1, 3, 16),
+        norm_cfg=norm_cfg),
+    decode_head=dict(
+        type='LRASPPHead',
+        in_channels=(16, 24, 960),
+        in_index=(0, 1, 2),
+        channels=128,
+        input_transform='multiple_select',
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/nonlocal_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/nonlocal_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..5674a39854cafd1f2e363bac99c58ccae62f24da
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/nonlocal_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='NLHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dropout_ratio=0.1,
+        reduction=2,
+        use_scale=True,
+        mode='embedded_gaussian',
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ocrnet_hr18.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ocrnet_hr18.py
new file mode 100644
index 0000000000000000000000000000000000000000..c60f62a7cdf3f5c5096a7a7e725e8268fddcb057
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ocrnet_hr18.py
@@ -0,0 +1,68 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='CascadeEncoderDecoder',
+    num_stages=2,
+    pretrained='open-mmlab://msra/hrnetv2_w18',
+    backbone=dict(
+        type='HRNet',
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(18, 36)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(18, 36, 72)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(18, 36, 72, 144)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[18, 36, 72, 144],
+            channels=sum([18, 36, 72, 144]),
+            in_index=(0, 1, 2, 3),
+            input_transform='resize_concat',
+            kernel_size=1,
+            num_convs=1,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[18, 36, 72, 144],
+            in_index=(0, 1, 2, 3),
+            input_transform='resize_concat',
+            channels=512,
+            ocr_channels=256,
+            dropout_ratio=-1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    ],
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ocrnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ocrnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..615aa3ff703942b6c22b2d6e9642504dd3e41ebd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/ocrnet_r50-d8.py
@@ -0,0 +1,47 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='CascadeEncoderDecoder',
+    num_stages=2,
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=1024,
+            in_index=2,
+            channels=256,
+            num_convs=1,
+            concat_input=False,
+            dropout_ratio=0.1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=2048,
+            in_index=3,
+            channels=512,
+            ocr_channels=256,
+            dropout_ratio=0.1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ],
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pointrend_r50.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pointrend_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d323dbf9466d41e0800aa57ef84045f3d874bdf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pointrend_r50.py
@@ -0,0 +1,56 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='CascadeEncoderDecoder',
+    num_stages=2,
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=4),
+    decode_head=[
+        dict(
+            type='FPNHead',
+            in_channels=[256, 256, 256, 256],
+            in_index=[0, 1, 2, 3],
+            feature_strides=[4, 8, 16, 32],
+            channels=128,
+            dropout_ratio=-1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+        dict(
+            type='PointHead',
+            in_channels=[256],
+            in_index=[0],
+            channels=256,
+            num_fcs=3,
+            coarse_pred_each_layer=True,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ],
+    # model training and testing settings
+    train_cfg=dict(
+        num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
+    test_cfg=dict(
+        mode='whole',
+        subdivision_steps=2,
+        subdivision_num_points=8196,
+        scale_factor=2))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/psanet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/psanet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..689513fa9d2a40f14bf0ae4ae61f38f0dcc1b3da
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/psanet_r50-d8.py
@@ -0,0 +1,49 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='PSAHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        mask_size=(97, 97),
+        psa_type='bi-direction',
+        compact=False,
+        shrink_factor=2,
+        normalization_factor=1.0,
+        psa_softmax=True,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pspnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pspnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..f451e08ad2eb0732dcb806b1851eb978d4acf136
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pspnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='PSPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pspnet_unet_s5-d16.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pspnet_unet_s5-d16.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcff9ec4f41fad158344ecd77313dc14564f3682
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/pspnet_unet_s5-d16.py
@@ -0,0 +1,50 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='UNet',
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        with_cp=False,
+        conv_cfg=None,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        upsample_cfg=dict(type='InterpConv'),
+        norm_eval=False),
+    decode_head=dict(
+        type='PSPHead',
+        in_channels=64,
+        in_index=4,
+        channels=16,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=128,
+        in_index=3,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=256, stride=170))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/upernet_r50.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/upernet_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..10974962fdd7136031fd06de1700f497d355ceaa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/models/upernet_r50.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='UPerHead',
+        in_channels=[256, 512, 1024, 2048],
+        in_index=[0, 1, 2, 3],
+        pool_scales=(1, 2, 3, 6),
+        channels=512,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..52603890b10f25faf8eec9f9e5a4468fae09b811
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_160k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=160000)
+checkpoint_config = dict(by_epoch=False, interval=16000)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf780a1b6f6521833c6a5859675147824efa599d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_20k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=20000)
+checkpoint_config = dict(by_epoch=False, interval=2000)
+evaluation = dict(interval=2000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_40k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_40k.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdbf841abcb26eed87bf76ab816aff4bae0630ee
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_40k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=40000)
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_80k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_80k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c190cee6bdc7922b688ea75dc8f152fa15c24617
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/_base_/schedules/schedule_80k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=80000)
+checkpoint_config = dict(by_epoch=False, interval=8000)
+evaluation = dict(interval=8000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7fc1648311d8f6789fd2ed99789973afbb940531
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/README.md
@@ -0,0 +1,52 @@
+# Asymmetric Non-local Neural Networks for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{annn,
+  author    = {Zhen Zhu and
+               Mengde Xu and
+               Song Bai and
+               Tengteng Huang and
+               Xiang Bai},
+  title     = {Asymmetric Non-local Neural Networks for Semantic Segmentation},
+  booktitle={International Conference on Computer Vision},
+  year      = {2019},
+  url       = {http://arxiv.org/abs/1908.07678},
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ANN    | R-50-D8  | 512x1024  |   40000 |        6 |           3.71 | 77.40 |         78.57 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json)     |
+| ANN    | R-101-D8 | 512x1024  |   40000 |      9.5 |           2.55 | 76.55 |         78.85 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json) |
+| ANN    | R-50-D8  | 769x769   |   40000 |      6.8 |           1.70 | 78.89 |         80.46 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json)         |
+| ANN    | R-101-D8 | 769x769   |   40000 |     10.7 |           1.15 | 79.32 |         80.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json)     |
+| ANN    | R-50-D8  | 512x1024  |   80000 | -        | -              | 77.34 |         78.65 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json)     |
+| ANN    | R-101-D8 | 512x1024  |   80000 | -        | -              | 77.14 |         78.81 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json) |
+| ANN    | R-50-D8  | 769x769   |   80000 | -        | -              | 78.88 |         80.57 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json)         |
+| ANN    | R-101-D8 | 769x769   |   80000 | -        | -              | 78.80 |         80.34 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                download                                                                                                                                                                                |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ANN    | R-50-D8  | 512x512   |   80000 |      9.1 |          21.01 | 41.01 |         42.30 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json)         |
+| ANN    | R-101-D8 | 512x512   |   80000 |     12.5 |          14.12 | 42.94 |         44.18 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json)     |
+| ANN    | R-50-D8  | 512x512   |  160000 | -        | -              | 41.74 |         42.62 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json)     |
+| ANN    | R-101-D8 | 512x512   |  160000 | -        | -              | 42.94 |         44.06 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                  download                                                                                                                                                                                  |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ANN    | R-50-D8  | 512x512   |   20000 |        6 |          20.92 | 74.86 |         76.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json)     |
+| ANN    | R-101-D8 | 512x512   |   20000 |      9.5 |          13.94 | 77.47 |         78.70 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json) |
+| ANN    | R-50-D8  | 512x512   |   40000 | -        | -              | 76.56 |         77.51 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json)     |
+| ANN    | R-101-D8 | 512x512   |   40000 | -        | -              | 76.70 |         78.06 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d494e07333217e0c6830d36d1bb58fa78b03cfb0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1eeff0b030cf1db8c6ec9740fa65db44b2026d58
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e43af541f6e3df3f36479e736bb0c03fc916970
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..d854f2e4223731f443369febc500dbccdc524d9d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..893c53b1ca4bf9788e4d94f0f53cfe92a93f48ce
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a64dac670ed4d4632e7b9791ec5f8a334dcea78e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..59508248490b3edbac1c46b4fcc7891f99655b9b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9c712d1ccfd62ddf6f12ff01ea347ca1995013b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ann_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b2594ba8a1c9edc90cca7a6d7c3334fa209edc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef7b369dd9e12b2282a30da14f99dd4547c53a7b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca6bb248ac867d463c274f975c884aa80a57730f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..071f190261c4e8f4a80a5da12a88e0cfcdfef0d8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..82a1c9386c51fb0ada436e51702beb961a534b26
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e04aa7c6ac050d119e07b715e2082f692e1a1de
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..4912bdb9fb298518ae084eb7df0ad22d3e4ff84f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1cc072b152986102286f503e3d7b92999bf414c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/ann_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c2ab106a29c1a135fc7a726df9f6f22536357ced
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/README.md
@@ -0,0 +1,39 @@
+# Adaptive Pyramid Context Network for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@InProceedings{He_2019_CVPR,
+author = {He, Junjun and Deng, Zhongying and Zhou, Lei and Wang, Yali and Qiao, Yu},
+title = {Adaptive Pyramid Context Network for Semantic Segmentation},
+booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+month = {June},
+year = {2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| APCNet | R-50-D8  | 512x1024  |   40000 |      7.7 |           3.57 | 78.02 |         79.26 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json)     |
+| APCNet | R-101-D8 | 512x1024  |   40000 |      11.2 |           2.15 | 79.08 |         80.34 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) |
+| APCNet | R-50-D8  | 769x769   |   40000 |      8.7 |           1.52 | 77.89 |         79.75 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json)         |
+| APCNet | R-101-D8 | 769x769   |   40000 |     12.7 |           1.03 | 77.96 |         79.24 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json)     |
+| APCNet | R-50-D8  | 512x1024  |   80000 | -        | -              | 78.96 |         79.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json)     |
+| APCNet | R-101-D8 | 512x1024  |   80000 | -        | -              | 79.64 |         80.61 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) |
+| APCNet | R-50-D8  | 769x769   |   80000 | -        | -              | 78.79 |         80.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json)         |
+| APCNet | R-101-D8 | 769x769   |   80000 | -        | -              | 78.45 |         79.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| APCNet | R-50-D8  | 512x512   |   80000 |      10.1 |          19.61 | 42.20 |         43.30 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json)         |
+| APCNet | R-101-D8 | 512x512   |   80000 |       13.6 |          13.10 | 45.54 |         46.65 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json)     |
+| APCNet | R-50-D8  | 512x512   |  160000 | -        | -              | 43.40 |         43.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json)     |
+| APCNet | R-101-D8 | 512x512   |  160000 | -        | -              | 45.41 |         46.63 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e1cec67355abae33d518417eb96eae111f16d2b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './apcnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..04cb006ba146268e1d3278151bc6ea00a4fb1bfe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './apcnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ce2279a0fbfd6fcc7cd20e3f552b1a39f47d943
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './apcnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f10b98406c88256c66d3bbe241c149791d68feb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './apcnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c44ebcaf36075e67208c5f033d1e5f9a78dda4e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './apcnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..616984575dda73a13fc5870f60ae6ffa30d6b01b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './apcnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..99c61a942e4868315ce4a9404d113f73fed4a4ea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..62a0627ae2e9bb17974068e56ee660093e944e0d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7821c559d2f92d23b28e07e040a54cfc425eefc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..daafa5fbc12c3ed6c10b5234d520166f774e0f94
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3db6140cb97da1d202fd464d01f793276effa629
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/apcnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cac4254f37bc3755bff869a10eb3cb75db4d943
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/apcnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..044d5896781de5824fc5a009d8c0eadf47a44e4e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/README.md
@@ -0,0 +1,47 @@
+# CCNet: Criss-Cross Attention for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{huang2018ccnet,
+    title={CCNet: Criss-Cross Attention for Semantic Segmentation},
+    author={Huang, Zilong and Wang, Xinggang and Huang, Lichao and Huang, Chang and Wei, Yunchao and Liu, Wenyu},
+    booktitle={ICCV},
+    year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                                                                                                                              |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCNet  | R-50-D8  | 512x1024  |   40000 |        6 |           3.32 | 77.76 |         78.87 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json)     |
+| CCNet  | R-101-D8 | 512x1024  |   40000 |      9.5 |           2.31 | 76.35 |         78.19 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json) |
+| CCNet  | R-50-D8  | 769x769   |   40000 |      6.8 |           1.43 | 78.46 |         79.93 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json)         |
+| CCNet  | R-101-D8 | 769x769   |   40000 |     10.7 |           1.01 | 76.94 |         78.62 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json)     |
+| CCNet  | R-50-D8  | 512x1024  |   80000 | -        | -              | 79.03 |         80.16 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json)     |
+| CCNet  | R-101-D8 | 512x1024  |   80000 | -        | -              | 78.87 |         79.90 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json) |
+| CCNet  | R-50-D8  | 769x769   |   80000 | -        | -              | 79.29 |         81.08 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json)         |
+| CCNet  | R-101-D8 | 769x769   |   80000 | -        | -              | 79.45 |         80.66 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                      download                                                                                                                                                                                      |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCNet  | R-50-D8  | 512x512   |   80000 |      8.8 |          20.89 | 41.78 |         42.98 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json)         |
+| CCNet  | R-101-D8 | 512x512   |   80000 |     12.2 |          14.11 | 43.97 |         45.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json)     |
+| CCNet  | R-50-D8  | 512x512   |  160000 | -        | -              | 42.08 |         43.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json)     |
+| CCNet  | R-101-D8 | 512x512   |  160000 | -        | -              | 43.71 |         45.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCNet  | R-50-D8  | 512x512   |   20000 |        6 |          20.45 | 76.17 |         77.51 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json)     |
+| CCNet  | R-101-D8 | 512x512   |   20000 |      9.5 |          13.64 | 77.27 |         79.02 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json) |
+| CCNet  | R-50-D8  | 512x512   |   40000 | -        | -              | 75.96 |         77.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json)     |
+| CCNet  | R-101-D8 | 512x512   |   40000 | -        | -              | 77.87 |         78.90 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2bac38ca6760af6441ede5a04409ed495ef87f3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..989928ab7f98da86e291451040ff85669a9fbddb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c32bf48751f0a18983bff0d99310870b71801663
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..53eb77c0cd6690668ee7c2a666bd85b9a5f7e73b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7eb668f39bbd22a1f42628428bc19d1645e9865
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..029c1d525b809b61dc8e548ebe4fb26e5c68a8be
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..43f05fab05ee4e20c3509a923118fe9818543cbd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..654f377b6f6152c9bd98d33824a39a41d7510c3f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './ccnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a4316dde57206fe369e72fa0d32a529fe1a1932
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..16e34356e9f8566ec73e3c25c771e281d3eeb975
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ad94d8988bb822c1571816255464126d9d5b95d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbcd29ccea8dcf9f67f1cd198dacd5dab380b265
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..947b8ac8ce1ddf7906ad39788c6992df3b506d29
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a1f49cf6b112afdadf1841571f51b98c010ddf8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..580d59ca6995ea95a9345ef3ea574ea5b57e9cfb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6dac64377bb3f73fdf5c836fa9c38757f75ff76
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/ccnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..00ba387203a257dbb485d68134c88cb43780722d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/README.md
@@ -0,0 +1,23 @@
+# CGNet: A Light-weight Context Guided Network for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latext
+@article{wu2018cgnet,
+  title={CGNet: A Light-weight Context Guided Network for Semantic Segmentation},
+  author={Wu, Tianyi and Tang, Sheng and Zhang, Rui and Zhang, Yongdong},
+  journal={arXiv preprint arXiv:1811.08201},
+  year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                          download                                                                                                                                                                                                          |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CGNet | M3N21  | 680x680  |   60000 |      7.5 |           30.51 | 65.63 |     68.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes_20201101_110253-4c0b2f2d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes-20201101_110253.log.json) |
+| CGNet | M3N21  | 512x1024 |   60000 |      8.3 |           31.14 | 68.27 |     70.33 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes_20201101_110254-124ea03b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes-20201101_110254.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/cgnet_512x1024_60k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..11421ef9d375d01b01c333c3705d6eb6e3348ee8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/cgnet_512x1024_60k_cityscapes.py
@@ -0,0 +1,66 @@
+_base_ = ['../_base_/models/cgnet.py', '../_base_/default_runtime.py']
+
+# optimizer
+optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+total_iters = 60000
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=8,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/train',
+        ann_dir='gtFine/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/cgnet_680x680_60k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b2f8eefb7dbecf81fcd2db54644493480825246
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/cgnet/cgnet_680x680_60k_cityscapes.py
@@ -0,0 +1,50 @@
+_base_ = [
+    '../_base_/models/cgnet.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py'
+]
+
+# optimizer
+optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+total_iters = 60000
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
+
+img_norm_cfg = dict(
+    mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True)
+crop_size = (680, 680)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=8,
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f49ccf96194f820f509aa9448493ba12ead91953
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/README.md
@@ -0,0 +1,47 @@
+# Dual Attention Network for Scene Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{fu2018dual,
+  title={Dual Attention Network for Scene Segmentation},
+  author={Jun Fu, Jing Liu, Haijie Tian, Yong Li, Yongjun Bao, Zhiwei Fang,and Hanqing Lu},
+  booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                                                                                                                              |
+|--------|----------|-----------|--------:|----------|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DANet  | R-50-D8  | 512x1024  |   40000 |      7.4 |           2.66 | 78.74 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json)     |
+| DANet  | R-101-D8 | 512x1024  |   40000 |     10.9 |           1.99 | 80.52 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json) |
+| DANet  | R-50-D8  | 769x769   |   40000 |      8.8 |           1.56 | 78.88 |         80.62 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json)         |
+| DANet  | R-101-D8 | 769x769   |   40000 |     12.8 |           1.07 | 79.88 |         81.47 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json)     |
+| DANet  | R-50-D8  | 512x1024  |   80000 | -        | -              | 79.34 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json)     |
+| DANet  | R-101-D8 | 512x1024  |   80000 | -        | -              | 80.41 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json) |
+| DANet  | R-50-D8  | 769x769   |   80000 | -        | -              | 79.27 |         80.96 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json)         |
+| DANet  | R-101-D8 | 769x769   |   80000 | -        | -              | 80.47 |         82.02 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                      download                                                                                                                                                                                      |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DANet  | R-50-D8  | 512x512   |   80000 |     11.5 |          21.20 | 41.66 |         42.90 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json)         |
+| DANet  | R-101-D8 | 512x512   |   80000 |       15 |          14.18 | 43.64 |         45.19 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json)     |
+| DANet  | R-50-D8  | 512x512   |  160000 | -        | -              | 42.45 |         43.25 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json)     |
+| DANet  | R-101-D8 | 512x512   |  160000 | -        | -              | 44.17 |         45.02 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DANet  | R-50-D8  | 512x512   |   20000 |      6.5 |          20.94 | 74.45 |         75.69 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json)     |
+| DANet  | R-101-D8 | 512x512   |   20000 |      9.9 |          13.76 | 76.02 |         77.23 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json) |
+| DANet  | R-50-D8  | 512x512   |   40000 | -        | -              | 76.37 |         77.29 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json)     |
+| DANet  | R-101-D8 | 512x512   |   40000 | -        | -              | 76.51 |         77.32 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bfb9bdb3064275c2ac3bf2a057ef8eb79c308df
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d80b2ec160ae1c41499d45242713a99122d8adf8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f22d0fb6362252ac02f3f152a42997c68b90343
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..709f93cba3e3bca6ce0635457ab1823b04123bf8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c623eb56836760694b50f3e4e66aa0f1fc069df
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd31bc8f283fe8c322ee4876deadb89569dc1743
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..597d76de79610780b03cd91dba5f3a4f10147bcd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..70f9b31966128e8d9ec37859f57a7edfd8e6d1b2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './danet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b70c5b8d49f04661e23604ca4da56a82b1b99c9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..03734310d7338c75d48c914cb325500961c04a79
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..22aaf857c3212d0b36b0b04e7990616025a3ef9b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..010f86f1aac1b5c827dec29f692d137dc1c399bf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cef0f09bfa2290d14fc3a783ea500d6c3da2931
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..154e84890ed73fe4813dddc8c321de6cd2854fc1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c5b94e5a27d7f902d4bdea7ef6c4ef0b816bb99
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7237ae03c601204dc7c03018ca17ed363090569
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/danet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c4994f6469051efd4881546acfcefef76616332e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/README.md
@@ -0,0 +1,66 @@
+# Rethinking atrous convolution for semantic image segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latext
+@article{chen2017rethinking,
+  title={Rethinking atrous convolution for semantic image segmentation},
+  author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig},
+  journal={arXiv preprint arXiv:1706.05587},
+  year={2017}
+}
+```
+
+## Results and models
+
+Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series.
+
+### Cityscapes
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                          download                                                                                                                                                                                                          |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-50-D8  | 512x1024  |   40000 |      6.1 |           2.57 | 79.09 |         80.45 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json)     |
+| DeepLabV3 | R-101-D8 | 512x1024  |   40000 |      9.6 |           1.92 | 77.12 |         79.61 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) |
+| DeepLabV3 | R-50-D8  | 769x769   |   40000 |      6.9 |           1.11 | 78.58 |         79.89 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json)         |
+| DeepLabV3 | R-101-D8 | 769x769   |   40000 |     10.9 |           0.83 | 79.27 |         80.11 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json)     |
+| DeepLabV3 | R-18-D8  | 512x1024  |   80000 |      1.7 |          13.78 | 76.70 |         78.27 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json)     |
+| DeepLabV3 | R-50-D8  | 512x1024  |   80000 | -        | -              | 79.32 |         80.57 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json)     |
+| DeepLabV3 | R-101-D8 | 512x1024  |   80000 | -        | -              | 80.20 |         81.21 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) |
+| DeepLabV3 | R-18-D8  | 769x769   |   80000 |      1.9 |           5.55 | 76.60 |         78.26 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json)     |
+| DeepLabV3 | R-50-D8  | 769x769   |   80000 | -        | -              | 79.89 |         81.06 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json)         |
+| DeepLabV3 | R-101-D8 | 769x769   |   80000 | -        | -              | 79.67 |         80.81 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json)     |
+| DeepLabV3 | R-101-D16-MG124 | 512x1024  |   40000 |      4.7 | -         6.96 | 76.71 |         78.63 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) |
+| DeepLabV3 | R-101-D16-MG124 | 512x1024  |   80000 |       -  | -              | 78.36 |         79.84 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) |
+| DeepLabV3 | R-18b-D8 | 512x1024  |   80000 |      1.6 |          13.93 | 76.26 |         77.88 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json)     |
+| DeepLabV3 | R-50b-D8 | 512x1024  |   80000 |      6.0 |           2.74 | 79.63 |         80.98 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json)     |
+| DeepLabV3 | R-101b-D8| 512x1024  |   80000 |      9.5 |           1.81 | 80.01 |         81.21 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) |
+| DeepLabV3 | R-18b-D8 | 769x769   |   80000 |      1.8 |           5.79 | 76.63 |         77.51 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json)     |
+| DeepLabV3 | R-50b-D8 | 769x769   |   80000 |      6.8 |           1.16 | 78.80 |         80.27 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json)         |
+| DeepLabV3 | R-101b-D8| 769x769   |   80000 |     10.7 |           0.82 | 79.41 |         80.73 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json)     |
+
+### ADE20K
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                  download                                                                                                                                                                                                  |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-50-D8  | 512x512   |   80000 |      8.9 |          14.76 | 42.42 |         43.28 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json)         |
+| DeepLabV3 | R-101-D8 | 512x512   |   80000 |     12.4 |          10.14 | 44.08 |         45.19 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json)     |
+| DeepLabV3 | R-50-D8  | 512x512   |  160000 | -        | -              | 42.66 |         44.09 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json)     |
+| DeepLabV3 | R-101-D8 | 512x512   |  160000 | -        | -              | 45.00 |         46.66 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                    download                                                                                                                                                                                                    |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-50-D8  | 512x512   |   20000 |      6.1 |          13.88 | 76.17 |         77.42 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json)     |
+| DeepLabV3 | R-101-D8 | 512x512   |   20000 |      9.6 |           9.81 | 78.70 |         79.95 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) |
+| DeepLabV3 | R-50-D8  | 512x512   |   40000 | -        | -              | 77.68 |         78.78 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json)     |
+| DeepLabV3 | R-101-D8 | 512x512   |   40000 | -        | -              | 77.92 |         79.18 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) |
+
+### Pascal Context
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                    download                                                                                                                                                                                                    |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-101-D8 | 480x480   |   40000 |      9.2 |           7.09 | 46.55 |         47.81 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) |
+| DeepLabV3 | R-101-D8 | 480x480   |   80000 | -        | -              | 46.42 |         47.53 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f20f260e23a95dfee9dfdceef9badab992246f53
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet101_v1c',
+    backbone=dict(
+        depth=101,
+        dilations=(1, 1, 1, 2),
+        strides=(1, 2, 2, 1),
+        multi_grid=(1, 2, 4)),
+    decode_head=dict(
+        dilations=(1, 6, 12, 18),
+        sampler=dict(type='OHEMPixelSampler', min_kept=100000)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..de4a8a5e9f030f1e8a8802596885186163f23eed
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet101_v1c',
+    backbone=dict(
+        depth=101,
+        dilations=(1, 1, 1, 2),
+        strides=(1, 2, 2, 1),
+        multi_grid=(1, 2, 4)),
+    decode_head=dict(
+        dilations=(1, 6, 12, 18),
+        sampler=dict(type='OHEMPixelSampler', min_kept=100000)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b5256f7b7b053cbe8d9e4ca2ec6139bb02387f6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..001b7a69c15299fc1fe5b269a5accf92c5ece032
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c707c79d659bc544d242352bcb29686eb40b004
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6804a5781369d1031f179d421a3b5a160fd575d3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..df6f36ef7c3b71ba7979aa7a1b226b3e3ebd9bb4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..40f5f62373e59d1c6c01ca3f57777698461127c9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb2be22f8bc2e10cdfba4f58b2ad1ced913b4ea4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..796ba3fb142394c4d93a29ba57548dca59d8d02b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6d58a67b3b4dddf3da42efca30fa599e623f183
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..13094a98ee9be3cf8c88370e1e111cb4dde03ec4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5186bf614bc9ebffe47323ea61afbc9604be265b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d185db95adc61734f11f0dcd7b6c45aa652680b0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e084e95c70b0b7b0c9dcc3388d6b7d3d51d54b6d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a990c076536ad9455a9203f5b6a60157f2f2f99f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b25e725ed98324e6ea648567740dc67e0413b4f9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd920f0ca7c690d3d1c44f5f7be1cbea18fa14d4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d493ef527bb161be98d0e4ea433104b3bb9ff48
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,10 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    auxiliary_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..71a0fda48aa2538e4d913e73e94a71564377ea50
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,10 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    auxiliary_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e7420d24a20b662286266cac58cab4721dc8df3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..132787db98d3fc9df5ed62e31738c82da8c279bf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4a9d4e1b9123b3c965cd430237ce9fcc7018a11
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..f62da1a8090da389a77d77a9887926af2a7ded49
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..492bd3dfdce331070cb9645dbe55142e9b662da1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..78f4d0d9de3d6b8dd2b097531317956d8e3b19f1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e35d1988f0bb7ad47a73ef1a64b73d9b40e0ba40
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd7c16580d0620bc854f2c6eb7c881bdcd23020a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e742d9a5ec2b6addf829cb802de27ea1afd53301
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..332d9cfb79fb698c7867f0f80053c1fd29bf2c1d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dc02660428fe534605ae3bf9659382c282379a4e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/README.md
@@ -0,0 +1,68 @@
+# Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{deeplabv3plus2018,
+  title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
+  author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
+  booktitle={ECCV},
+  year={2018}
+}
+```
+
+## Results and models
+
+Note:
+`D-8`/`D-16` here corresponding to the output stride 8/16 setting for DeepLab series.
+`MG-124` stands for multi-grid dilation in the last stage of ResNet.
+
+### Cityscapes
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                                      download                                                                                                                                                                                                                      |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-50-D8  | 512x1024  |   40000 |      7.5 |           3.94 | 79.61 |         81.01 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json)     |
+| DeepLabV3+ | R-101-D8 | 512x1024  |   40000 |       11 |           2.60 | 80.21 |         81.82 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) |
+| DeepLabV3+ | R-50-D8  | 769x769   |   40000 |      8.5 |           1.72 | 78.97 |         80.46 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json)         |
+| DeepLabV3+ | R-101-D8 | 769x769   |   40000 |     12.5 |           1.15 | 79.46 |         80.50 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json)     |
+| DeepLabV3+ | R-18-D8  | 512x1024  |   80000 |      2.2 |          14.27 | 76.89 |         78.76 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json)     |
+| DeepLabV3+ | R-50-D8  | 512x1024  |   80000 | -        | -              | 80.09 |         81.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json)     |
+| DeepLabV3+ | R-101-D8 | 512x1024  |   80000 | -        | -              | 80.97 |         82.03 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) |
+| DeepLabV3+ | R-18-D8  | 769x769   |   80000 |      2.5 |           5.74 | 76.26 |         77.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json)     |
+| DeepLabV3+ | R-50-D8  | 769x769   |   80000 | -        | -              | 79.83 |         81.48 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json)         |
+| DeepLabV3+ | R-101-D8 | 769x769   |   80000 | -        | -              | 80.98 |         82.18 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405.log.json)     |
+| DeepLabV3+ | R-101-D16-MG124 | 512x1024  |   40000 |      5.8 |           7.48 | 79.09 |         80.36 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) |
+| DeepLabV3+ | R-101-D16-MG124 | 512x1024  |   80000 |      9.9 | -              | 79.90 |         81.33 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) |
+| DeepLabV3+ | R-18b-D8 | 512x1024  |   80000 |      2.1 |          14.95 | 75.87 |         77.52 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json)     |
+| DeepLabV3+ | R-50b-D8 | 512x1024  |   80000 |      7.4 |           3.94 | 80.28 |         81.44 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json)     |
+| DeepLabV3+ | R-101b-D8| 512x1024  |   80000 |     10.9 |           2.60 | 80.16 |         81.41 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json) |
+| DeepLabV3+ | R-18b-D8 | 769x769   |   80000 |      2.4 |           5.96 | 76.36 |         78.24 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json)     |
+| DeepLabV3+ | R-50b-D8 | 769x769   |   80000 |      8.4 |           1.72 | 79.41 |         80.56 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json)         |
+| DeepLabV3+ | R-101b-D8| 769x769   |   80000 |     12.3 |           1.10 | 79.88 |         81.46 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json)     |
+
+### ADE20K
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                              download                                                                                                                                                                                                              |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-50-D8  | 512x512   |   80000 |     10.6 |          21.01 | 42.72 |         43.75 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json)         |
+| DeepLabV3+ | R-101-D8 | 512x512   |   80000 |     14.1 |          14.16 | 44.60 |         46.06 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json)     |
+| DeepLabV3+ | R-50-D8  | 512x512   |  160000 | -        | -              | 43.95 |         44.93 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json)     |
+| DeepLabV3+ | R-101-D8 | 512x512   |  160000 | -        | -              | 45.47 |         46.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) |
+
+#### Pascal VOC 2012 + Aug
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                                download                                                                                                                                                                                                                |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-50-D8  | 512x512   |   20000 |      7.6 |             21 | 75.93 |         77.50 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json)     |
+| DeepLabV3+ | R-101-D8 | 512x512   |   20000 |       11 |          13.88 | 77.22 |         78.59 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) |
+| DeepLabV3+ | R-50-D8  | 512x512   |   40000 | -        | -              | 76.81 |         77.57 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json)     |
+| DeepLabV3+ | R-101-D8 | 512x512   |   40000 | -        | -              | 78.62 |         79.53 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) |
+
+#### Pascal Context
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                                download                                                                                                                                                                                                                |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-101-D8 | 480x480   |   40000 |       - |          9.09 | 47.30 |         48.47 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) |
+| DeepLabV3+ | R-101-D8 | 480x480   |   80000 | -        | -              | 47.23 |         48.26 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf39d2f12b719b1c91e38bef71f0f5232543b0dc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet101_v1c',
+    backbone=dict(
+        depth=101,
+        dilations=(1, 1, 1, 2),
+        strides=(1, 2, 2, 1),
+        multi_grid=(1, 2, 4)),
+    decode_head=dict(
+        dilations=(1, 6, 12, 18),
+        sampler=dict(type='OHEMPixelSampler', min_kept=100000)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c53ec41baf9043029549b4893b2380372ea5ecd9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet101_v1c',
+    backbone=dict(
+        depth=101,
+        dilations=(1, 1, 1, 2),
+        strides=(1, 2, 2, 1),
+        multi_grid=(1, 2, 4)),
+    decode_head=dict(
+        dilations=(1, 6, 12, 18),
+        sampler=dict(type='OHEMPixelSampler', min_kept=100000)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..68e2b072e4b8d076e8c3e929dfdc73bcd24ce859
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a46c28608add5325ec1decf33624c3c00bff1d7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6ce85aea5a960e76f8154a5319c7c52e98c4c45
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ebbd3c70ee5e33c6ef4ae76b6c6a6ce828d07b4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a75c9d3019b13d01c0dd13dae53bce3d15791d52
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebb1a8eaee16de7443ab3e79e02a37340de511d7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..3caa6cf8ae61d467628378d99a919c9db1253b91
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..53fd3a909585367ca59eb827c2fbbab4cdf234ea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3c92eb26f8fead94f5ad7ac7d7fb60d92c57114
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ea9cdb5b639e5284cd46e02ce1b67b4729950f7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..398d9759cafc1d01e78c138abd249808531a97b9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..136449083f7a9efbad6df94f1acd04170147aaba
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff70c93e6142ddda3a874d9dfd57ec6c4cd89b3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        c1_in_channels=64,
+        c1_channels=12,
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0172d9a87d6dc1c75bf75a9c48363eb985d389a8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        c1_in_channels=64,
+        c1_channels=12,
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b90b292b03a80aa37b8ca236746cf7cddc4ac27e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        c1_in_channels=64,
+        c1_channels=12,
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b49da3581d9697e726e114b1564fc58a55ef1099
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        c1_in_channels=64,
+        c1_channels=12,
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..318845de1e2124a4dff3348749ec5a13d78d686f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,10 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    auxiliary_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..1736c2397a9b2a4b4fb12eee8175e5ee98eaf805
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,10 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    auxiliary_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7243d0390f6394fdd528c881bb128b2c13d08037
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3304d3677f5357f1a3e343b39fcd97b238abdb5e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1491e3b8247c9d163d6016caf2fcd8043a053b7e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..1056ad4d1e2a4f956d12f6daf506620fab27dd17
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..e36c83ba601884b81c06ee69445a94e76224c828
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..352d870bc8eab11974640c4b2d9c80dc6fbbaaf2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4bda3eded693bfd44a8c86ced7ae6ee9963c583
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1420b97a4bd0dc0f5451623697666012a2de635c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd8e1da9c7b1d86bc8a0c834bbede9d0fd40acf5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0ba019136c2e4f33b015be3d82505bee2066655
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b12c8d862fb7b7633c5b2f4a1c357803abdcd32
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/README.md
@@ -0,0 +1,39 @@
+# Dynamic Multi-scale Filters for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@InProceedings{He_2019_ICCV,
+author = {He, Junjun and Deng, Zhongying and Qiao, Yu},
+title = {Dynamic Multi-Scale Filters for Semantic Segmentation},
+booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
+month = {October},
+year = {2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DMNet | R-50-D8  | 512x1024  |   40000 |      7.0 |           3.66 | 77.78 |         79.14 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json)     |
+| DMNet | R-101-D8 | 512x1024  |   40000 |      10.6 |           2.54 | 78.37 |         79.72 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) |
+| DMNet | R-50-D8  | 769x769   |   40000 |      7.9 |           1.57 | 78.49 |         80.27 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json)         |
+| DMNet | R-101-D8 | 769x769   |   40000 |     12.0 |           1.01 | 77.62 |         78.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json)     |
+| DMNet | R-50-D8  | 512x1024  |   80000 | -        | -              | 79.07 |         80.22 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json)     |
+| DMNet | R-101-D8 | 512x1024  |   80000 | -        | -              | 79.64 |         80.67 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) |
+| DMNet | R-50-D8  | 769x769   |   80000 | -        | -              | 79.22 |         80.55 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json)         |
+| DMNet | R-101-D8 | 769x769   |   80000 | -        | -              | 79.19 |         80.65 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DMNet | R-50-D8  | 512x512   |   80000 |      9.4 |          20.95 | 42.37 |         43.62 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json)         |
+| DMNet | R-101-D8 | 512x512   |   80000 |       13.0 |          13.88 | 45.34 |         46.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json)     |
+| DMNet | R-50-D8  | 512x512   |  160000 | -        | -              | 43.15 |         44.17 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json)     |
+| DMNet | R-101-D8 | 512x512   |  160000 | -        | -              | 45.42 |         46.76 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd6897691d3f8f200783fae7bfe231735f25a11b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dmnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..116cbdcede32bf24ad95f04291e98754011172c9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dmnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..d78d46c040f75d16225307d4b4151b87e6e3db29
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './dmnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..9713b731a47df9c5e23d26a08ad17d03a0d5e9fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './dmnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b222e730073dd42df618db5660ee9d4117f3956
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dmnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f36d490e9c9b31de7eedf735d2712e55f35db998
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dmnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f9a917fa4223bd2428f2b2d10eac446f7ecc71a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b38f90dc4318f23d32971e7afbf90a327768f2d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8fbd9beb11f3d1308ce2cd12da2a177c2d39478
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..74f6d6a85a06e96580a3c8d5843f660c85bca5ad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..19841547a42315164de547a4121cfd64739cf24b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/dmnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..31d95f96eb10025c2ad054cde4c81f47db21f0f2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/dmnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..172dfe1a0f07646c5f8cc47ddd62f5cb6da85a55
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/README.md
@@ -0,0 +1,42 @@
+# Disentangled Non-Local Neural Networks
+
+## Introduction
+
+[ALGORITHM]
+
+This example is to reproduce ["Disentangled Non-Local Neural Networks"](https://arxiv.org/abs/2006.06668) for semantic segmentation. It is still in progress.
+
+## Citation
+
+```latex
+@misc{yin2020disentangled,
+    title={Disentangled Non-Local Neural Networks},
+    author={Minghao Yin and Zhuliang Yao and Yue Cao and Xiu Li and Zheng Zhang and Stephen Lin and Han Hu},
+    year={2020},
+    booktitle={ECCV}
+}
+```
+
+## Results and models (in progress)
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                    download                                                                                                                                                                                    |
+|--------|----------|-----------|--------:|---------:|----------------|------:|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| dnl    | R-50-D8  | 512x1024  |   40000 |     7.3  | 2.56           | 78.61 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json)     |
+| dnl    | R-101-D8 | 512x1024  |   40000 |     10.9 | 1.96           | 78.31 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json) |
+| dnl    | R-50-D8  | 769x769   |   40000 |     9.2  | 1.50           | 78.44 | 80.27         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json)         |
+| dnl    | R-101-D8 | 769x769   |   40000 |     12.6 | 1.02           | 76.39 | 77.77         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json)     |
+| dnl    | R-50-D8  | 512x1024  |   80000 |     -    | -              | 79.33 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json)     |
+| dnl    | R-101-D8 | 512x1024  |   80000 |     -    | -              | 80.41 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json) |
+| dnl    | R-50-D8  | 769x769   |   80000 |     -    | -              | 79.36 | 80.70         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json)         |
+| dnl    | R-101-D8 | 769x769   |   80000 |     -    | -              | 79.41 | 80.68         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                            download                                                                                                                                                                            |
+|--------|----------|-----------|--------:|---------:|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DNL    | R-50-D8  | 512x512   |   80000 |      8.8 | 20.66          | 41.76 | 42.99         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json)         |
+| DNL    | R-101-D8 | 512x512   |   80000 |     12.8 | 12.54          | 43.76 | 44.91         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json)     |
+| DNL    | R-50-D8  | 512x512   |  160000 |      -   | -              | 41.87 | 43.01         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json)     |
+| DNL    | R-101-D8 | 512x512   |  160000 |      -   | -              | 44.25 | 45.78         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a36e3c80a13f91e37e4d90b7ae47c7e0d204144
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dnl_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f2e1b6da7e63841f4429b1caed5fbe9d537c4f8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dnl_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..aca44e478b67d5a226681c099e64fe67d93cf39b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './dnl_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebd27a1d1c6bf0e983fafed2e5659701dadb8f24
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './dnl_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..575e9d01343a4563e0d3ba89b361ea8e358d2dee
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dnl_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f1b9e19411eb963d16fd2a8174529e69ecd5a1a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './dnl_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7aa7444d4c8022563db642478beec4dc5ab0dab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdff93f543af6bac93949e68532daea45e437167
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..5305689d09b944f6e37aa85567ce3f29fc6974a7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..09604c39729abfc9015eb971069b987c8d8a82cb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0666199b63e604b09fe8187c378589c25d0d311b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/dnl_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7b07c4f47629c07faa013b9d1eae3462d898c6f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,12 @@
+_base_ = [
+    '../_base_/models/dnl_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
+optimizer = dict(
+    paramwise_cfg=dict(
+        custom_keys=dict(theta=dict(wd_mult=0.), phi=dict(wd_mult=0.))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..40df946ed446cf862847dad084324412e5aa52ee
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/README.md
@@ -0,0 +1,26 @@
+# Expectation-Maximization Attention Networks for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{li2019expectation,
+  title={Expectation-maximization attention networks for semantic segmentation},
+  author={Li, Xia and Zhong, Zhisheng and Wu, Jianlong and Yang, Yibo and Lin, Zhouchen and Liu, Hong},
+  booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+  pages={9167--9176},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                          download                                                                                                                                                                                          |
+|--------|----------|-----------|--------:|---------:|----------------|------:|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| EMANet | R-50-D8  | 512x1024  |   80000 |      5.4 | 4.58           | 77.59 | 79.44         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json)     |
+| EMANet | R-101-D8 | 512x1024  |   80000 |      6.2 | 2.87           | 79.10 | 81.21         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json) |
+| EMANet | R-50-D8  | 769x769   |   80000 |      8.9 | 1.97           | 79.33 | 80.49         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json)         |
+| EMANet | R-101-D8 | 769x769   |   80000 |     10.1 | 1.22           | 79.62 | 81.00         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json)     |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..58f28b43f55f54c7a604960735963e6b7c13b6f1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './emanet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5dbf20b0fcc7bc1dd077bd8b7077772251d4c1a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './emanet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..73b7788bf924be2e1588596a88f0155ddc37358e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/emanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..699aa212c3518901b2f84db3f062c16b023c7538
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/emanet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6ba42f69fae5e52254b34195b6cd0ed689c5bf6c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/README.md
@@ -0,0 +1,39 @@
+# Context Encoding for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@InProceedings{Zhang_2018_CVPR,
+author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
+title = {Context Encoding for Semantic Segmentation},
+booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+month = {June},
+year = {2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| encnet | R-50-D8  | 512x1024  |   40000 |      8.6 |           4.58 | 75.67 |         77.08 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json)     |
+| encnet | R-101-D8 | 512x1024  |   40000 |     12.1 |           2.66 | 75.81 |         77.21 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) |
+| encnet | R-50-D8  | 769x769   |   40000 |      9.8 |           1.82 | 76.24 |         77.85 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json)         |
+| encnet | R-101-D8 | 769x769   |   40000 |     13.7 |           1.26 | 74.25 |         76.25 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json)     |
+| encnet | R-50-D8  | 512x1024  |   80000 | -        | -              | 77.94 |         79.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json)     |
+| encnet | R-101-D8 | 512x1024  |   80000 | -        | -              | 78.55 |         79.47 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) |
+| encnet | R-50-D8  | 769x769   |   80000 | -        | -              | 77.44 |         78.72 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json)         |
+| encnet | R-101-D8 | 769x769   |   80000 | -        | -              | 76.10 |         76.97 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| encnet | R-50-D8  | 512x512   |   80000 |     10.1 |          22.81 | 39.53 |         41.17 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json)         |
+| encnet | R-101-D8 | 512x512   |   80000 |     13.6 |          14.87 | 42.11 |         43.61 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json)     |
+| encnet | R-50-D8  | 512x512   |  160000 | -        | -              | 40.10 |         41.71 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json)     |
+| encnet | R-101-D8 | 512x512   |  160000 | -        | -              | 42.61 |         44.01 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f34373d9ebab5ef6f4c01e3eab8a97c288495be0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b0207b3144460d25229e3ac4c4d0d9fc1d34292
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fec6ba255f33d48a66a831de4571346a7a2bd2e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..c264af998b5ef6a9e521db204205fb998cce68a9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a6968ea583758191fa8e94497c7186e653c7afb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..94151004ea88394373cf8f135b065d5056b11179
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6ade67b76ce04e1ede3ff99aab4863705cff446
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..55648c08b2c4eb78d7d5ae65482e5e5b291c058a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './encnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ea6ed0e84f3aa7d2c7acd8dd5c459a8cd3ce45c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2feeef7e982550481365f8187cb1a50f0fafcc9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a5dc203cc793860aae7743d16c4fb9a564ad1d8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cb7952cede58165d2ed0f35d2208ad1ffb65232
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..81f3cbfbf516e833821c49deecd8f167170021f0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..835375cb0447378fc76431158eb0b8fc011d36bc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d311e33f56ba431a882b0e7079001b0e9932a011
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b535f3c80818ce6b692b66f18ceee8e7b181fdc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..600b701a7194ead496cc924bee897b6096e1c7ca
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    backbone=dict(stem_channels=128),
+    decode_head=dict(num_classes=150),
+    auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fastscnn/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fastscnn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb87a9f7aeb8d3630aeccc04b585a4dfec9f2b7e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fastscnn/README.md
@@ -0,0 +1,22 @@
+# Fast-SCNN for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{poudel2019fast,
+  title={Fast-scnn: Fast semantic segmentation network},
+  author={Poudel, Rudra PK and Liwicki, Stephan and Cipolla, Roberto},
+  journal={arXiv preprint arXiv:1902.04502},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|   Method   | Backbone  | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                              |
+|------------|-----------|-----------|--------:|----------|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Fast-SCNN  | Fast-SCNN | 512x1024  |   80000 |      8.4 |          63.61 | 69.06 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-f5096c79.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-20200807_165744.log.json)     |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d9c9999370c8b1c28af3063a3aded0d88c91caf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = [
+    '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+
+# Re-config the data sampler.
+data = dict(samples_per_gpu=2, workers_per_gpu=4)
+
+# Re-config the optimizer.
+optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..95ca2ac0439c3a33ef13f2e22d7fa7a83754142b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/README.md
@@ -0,0 +1,66 @@
+# Fully Convolutional Networks for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{shelhamer2017fully,
+  title={Fully convolutional networks for semantic segmentation},
+  author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor},
+  journal={IEEE transactions on pattern analysis and machine intelligence},
+  volume={39},
+  number={4},
+  pages={640--651},
+  year={2017},
+  publisher={IEEE Trans Pattern Anal Mach Intell}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | R-50-D8  | 512x1024  |   40000 |      5.7 |           4.17 | 72.25 |         73.36 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json)     |
+| FCN    | R-101-D8 | 512x1024  |   40000 |      9.2 |           2.66 | 75.45 |         76.58 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) |
+| FCN    | R-50-D8  | 769x769   |   40000 |      6.5 |           1.80 | 71.47 |         72.54 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json)         |
+| FCN    | R-101-D8 | 769x769   |   40000 |     10.4 |           1.19 | 73.93 |         75.14 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json)     |
+| FCN    | R-18-D8  | 512x1024  |   80000 |      1.7 |          14.65 | 71.11 |         72.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json)     |
+| FCN    | R-50-D8  | 512x1024  |   80000 | -        |                | 73.61 |         74.24 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json)     |
+| FCN    | R-101-D8 | 512x1024  |   80000 | -        | -              | 75.13 |         75.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) |
+| FCN    | R-18-D8  | 769x769   |   80000 |      1.9 |           6.40 | 70.80 |         73.16 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json)         |
+| FCN    | R-50-D8  | 769x769   |   80000 | -        | -              | 72.64 |         73.32 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json)         |
+| FCN    | R-101-D8 | 769x769   |   80000 | -        | -              | 75.52 |         76.61 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json)     |
+| FCN    | R-18b-D8 | 512x1024  |   80000 |      1.6 |          16.74 | 70.24 |         72.77 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json)     |
+| FCN    | R-50b-D8 | 512x1024  |   80000 |      5.6 |           4.20 | 75.65 |         77.59 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json)     |
+| FCN    | R-101b-D8| 512x1024  |   80000 |      9.1 |           2.73 | 77.37 |         78.77 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json) |
+| FCN    | R-18b-D8 | 769x769   |   80000 |      1.7 |           6.70 | 69.66 |         72.07 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json)         |
+| FCN    | R-50b-D8 | 769x769   |   80000 |      6.3 |           1.82 | 73.83 |         76.60 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json)         |
+| FCN    | R-101b-D8| 769x769   |   80000 |     10.3 |           1.15 | 77.02 |         78.67 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                download                                                                                                                                                                                |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | R-50-D8  | 512x512   |   80000 |      8.5 |          23.49 | 35.94 |         37.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json)         |
+| FCN    | R-101-D8 | 512x512   |   80000 |       12 |          14.78 | 39.61 |         40.83 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json)     |
+| FCN    | R-50-D8  | 512x512   |  160000 | -        | -              | 36.10 |         38.08 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json)     |
+| FCN    | R-101-D8 | 512x512   |  160000 | -        | -              | 39.91 |         41.40 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                  download                                                                                                                                                                                  |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | R-50-D8  | 512x512   |   20000 |      5.7 |          23.28 | 67.08 |         69.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json)     |
+| FCN    | R-101-D8 | 512x512   |   20000 |      9.2 |          14.81 | 71.16 |         73.57 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) |
+| FCN    | R-50-D8  | 512x512   |   40000 | -        | -              | 66.97 |         69.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json)     |
+| FCN    | R-101-D8 | 512x512   |   40000 | -        | -              | 69.91 |         72.38 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) |
+
+### Pascal Context
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                  download                                                                                                                                                                                  |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | R-101-D8 | 480x480   |   40000 |      - |          9.93 | 44.14 |         45.67 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20200911_212515-9b565a6d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20200911_212515.log.json) |
+| FCN    | R-101-D8 | 480x480   |   80000 | -        | -              | 44.47 |         45.74 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20200915_032644-a3828480.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20200915_032644.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3a15b41054318d508e98685632921f262029de0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdccfd99ba0c25646f02850483c2cdf679fdbf3d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7918dd10d05cd98dbc02f02ef1b93e3134f52357
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..528110dc73c15008869a9ad9851ef487f0c952c7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bf6780f2c821052692ddcb904bd10e6256c1e71
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..09a5fe5468f0155f8fd0bf2cd1574a33624d8492
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..eafefaa67565513c277c5eb42e3661a88133cb27
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d0294530f4c817b352cb020d111e3248690ae1f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b4cc571294fa45b4442c2bfeb9fda13a14fc5c2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3503c76935e294c881130b309999d32f13df8839
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b9bf60fc13364ca1b7b3842664950f653426e67
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f36eb02e68707d502cbe315ff8f6f25b232dee92
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a1d29e480cb46a763cb17d2105b3f040153d417
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6644a58dea86fd38e208abbedffe4f836e677078
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..92accfc703fc398d2845d7dc2f1d5336f24738e8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dd34dd2134c745275c66adc5488b4b9f68d6809
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdc6314f704e61d064f5fb7bdd30bc38a9e87ee5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..0870f928b82b24b8179305f6c9fc7f6013fb481e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..401c6ea7330d45d8f7604a1da63fc6e15faea424
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..990a085eda2f2dc47f1a1289bfbf2726ad8c9c4f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ca7fd23cedc0567a015bd5f8641a509ead6110a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..17206a5171dcc357c589a1711afa52d87faeece0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cec429c3e27ad2543b7e38fa206e6606fda4d5a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef194cb594eb76316324066e23e48184d8cede27
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fca98c1d9ace73a61ae395914e5960832216bf67
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d75cd9f49343355b14c7d60bb0df0936ffe0278
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..28ef13f8d17e977f710ba9a863f182b1f80dc8cf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..106f7b6a1ece974c9f732ee813724bd8bda3bef3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8d12e4d78025313fb9689bb320b07520bd7648ef
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/README.md
@@ -0,0 +1,25 @@
+# Mixed Precision Training
+
+## Introduction
+
+[OTHERS]
+
+```latex
+@article{micikevicius2017mixed,
+  title={Mixed precision training},
+  author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
+  journal={arXiv preprint arXiv:1710.03740},
+  year={2017}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | R-101-D8 | 512x1024  |   80000 | 5.50        | 2.66              | 76.80 |         - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes-50245227.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230921.log.json) |
+| PSPNet    | R-101-D8 | 512x1024  |   80000 | 5.47        | 2.68              | 79.46 |         - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes-ade37931.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230919.log.json) |
+| DeepLabV3    | R-101-D8 | 512x1024  |   80000 | 5.91        | 1.93              | 80.48 |         - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes-bc86dc84.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) |
+| DeepLabV3+    | R-101-D8 | 512x1024  |   80000 | 6.46        | 2.60              | 80.46 |         - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes-cc58bc8d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..60d8350e98605c2445ce8359ca9a7a1951fe0085
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py
@@ -0,0 +1,3 @@
+_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
+# fp16 settings
+optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c263d6907ec9ddfdad32b380a7d926d1391e393c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py
@@ -0,0 +1,3 @@
+_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
+# fp16 settings
+optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8100a8e64d780c1b9c272b57e4e171c5e1a4120b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py
@@ -0,0 +1,3 @@
+_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py'
+# fp16 settings
+optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..aefac2953abf65c10023599ec42c709a794935c8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py
@@ -0,0 +1,3 @@
+_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py'
+# fp16 settings
+optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b840d5bf9f844e7af89803b48a43b76b887ced36
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/README.md
@@ -0,0 +1,48 @@
+# GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{cao2019gcnet,
+  title={Gcnet: Non-local networks meet squeeze-excitation networks and beyond},
+  author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han},
+  booktitle={Proceedings of the IEEE International Conference on Computer Vision Workshops},
+  pages={0--0},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                                                                                                                              |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GCNet  | R-50-D8  | 512x1024  |   40000 |      5.8 |           3.93 | 77.69 |         78.56 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json)     |
+| GCNet  | R-101-D8 | 512x1024  |   40000 |      9.2 |           2.61 | 78.28 |         79.34 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) |
+| GCNet  | R-50-D8  | 769x769   |   40000 |      6.5 |           1.67 | 78.12 |         80.09 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json)         |
+| GCNet  | R-101-D8 | 769x769   |   40000 |     10.5 |           1.13 | 78.95 |         80.71 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json)     |
+| GCNet  | R-50-D8  | 512x1024  |   80000 | -        | -              | 78.48 |         80.01 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json)     |
+| GCNet  | R-101-D8 | 512x1024  |   80000 | -        | -              | 79.03 |         79.84 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) |
+| GCNet  | R-50-D8  | 769x769   |   80000 | -        | -              | 78.68 |         80.66 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json)         |
+| GCNet  | R-101-D8 | 769x769   |   80000 | -        | -              | 79.18 |         80.71 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                      download                                                                                                                                                                                      |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GCNet  | R-50-D8  | 512x512   |   80000 |      8.5 |          23.38 | 41.47 |         42.85 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json)         |
+| GCNet  | R-101-D8 | 512x512   |   80000 |       12 |          15.20 | 42.82 |         44.54 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json)     |
+| GCNet  | R-50-D8  | 512x512   |  160000 | -        | -              | 42.37 |         43.52 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json)     |
+| GCNet  | R-101-D8 | 512x512   |  160000 | -        | -              | 43.69 |         45.21 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| GCNet  | R-50-D8  | 512x512   |   20000 |      5.8 |          23.35 | 76.42 |         77.51 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json)     |
+| GCNet  | R-101-D8 | 512x512   |   20000 |      9.2 |          14.80 | 77.41 |         78.56 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) |
+| GCNet  | R-50-D8  | 512x512   |   40000 | -        | -              | 76.24 |         77.63 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json)     |
+| GCNet  | R-101-D8 | 512x512   |   40000 | -        | -              | 77.84 |         78.59 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..27bd9422dad49bc5a06f577ee45cd834bdbe3912
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f0f83fe39da31fe9a5b497e0481e1c79a33e764
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..9888120f65b045df1c7d4d05fb010373abf82ccf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b70ca8e46a0409379f5ae9809ce03de203426ad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..b17c7a12b547ee4e1cd60d667c575eab06eb071c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2183fc2db1ff188b0ad5418e55f71005da926cc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..08a6031f20234b1cc1d792ea5d4891613503a185
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5efb61339cdbdde585f7814e9650be2e2df654ac
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './gcnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..610467c07204140bf604f8dda2aa57978c565ed3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..155e28f42194112703bb21473e5e3dd0fca40d49
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1549a4d5bf10cd3fd6e3bd57bf7a48e7e5e1ede8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..a496204bdb061d975c40cb7ef2aaada40e020a13
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..d85cf6550fea5da7cf1fa078eb4fa30e017166b4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..89d5e1ae0f3ef44626f3b5534c504cbce7389a32
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..332495d3d7f7d7c7c0e0aca4e379cd54e2ed07de
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6d9cb1c64bcf8c3e952b6f8adc11bec0403d106
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/gcnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4d77cefe3e51091bc2264df8b201968addd81e8d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/README.md
@@ -0,0 +1,59 @@
+# Deep High-Resolution Representation Learning for Human Pose Estimation
+
+## Introduction
+
+[ALGORITHM]
+
+```latext
+@inproceedings{SunXLW19,
+  title={Deep High-Resolution Representation Learning for Human Pose Estimation},
+  author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
+  booktitle={CVPR},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | HRNetV2p-W18-Small | 512x1024  |   40000 |      1.7 |          23.74 | 73.86 |         75.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json)     |
+| FCN    | HRNetV2p-W18       | 512x1024  |   40000 |      2.9 |          12.97 | 77.19 |         78.92 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json)         |
+| FCN    | HRNetV2p-W48       | 512x1024  |   40000 |      6.2 |           6.42 | 78.48 |         79.69 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json)         |
+| FCN    | HRNetV2p-W18-Small | 512x1024  |   80000 | -        | -              | 75.31 |         77.48 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json)     |
+| FCN    | HRNetV2p-W18       | 512x1024  |   80000 | -        | -              | 78.65 |         80.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json)         |
+| FCN    | HRNetV2p-W48       | 512x1024  |   80000 | -        | -              | 79.93 |         80.72 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json)         |
+| FCN    | HRNetV2p-W18-Small | 512x1024  |  160000 | -        | -              | 76.31 |         78.31 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) |
+| FCN    | HRNetV2p-W18       | 512x1024  |  160000 | -        | -              | 78.80 |         80.74 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json)     |
+| FCN    | HRNetV2p-W48       | 512x1024  |  160000 | -        | -              | 80.65 |         81.92 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json)     |
+
+### ADE20K
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                              download                                                                                                                                                                              |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | HRNetV2p-W18-Small | 512x512   |   80000 |      3.8 |          38.66 | 31.38 |         32.45 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json)     |
+| FCN    | HRNetV2p-W18       | 512x512   |   80000 |      4.9 |          22.57 | 35.51 |         36.80 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145-66f20cb7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145.log.json)         |
+| FCN    | HRNetV2p-W48       | 512x512   |   80000 |      8.2 |          21.23 | 41.90 |         43.27 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json)         |
+| FCN    | HRNetV2p-W18-Small | 512x512   |  160000 | -        | -              | 33.00 |         34.55 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413.log.json) |
+| FCN    | HRNetV2p-W18       | 512x512   |  160000 | -        | -              | 36.79 |         38.58 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json)     |
+| FCN    | HRNetV2p-W48       | 512x512   |  160000 | -        | -              | 42.02 |         43.86 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json)     |
+
+### Pascal VOC 2012 + Aug
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                download                                                                                                                                                                                |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | HRNetV2p-W18-Small | 512x512   |   20000 |      1.8 |          43.36 | 65.20 |         68.55 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503-56e36088.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503.log.json) |
+| FCN    | HRNetV2p-W18       | 512x512   |   20000 |      2.9 |          23.48 | 72.30 |         74.71 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json)     |
+| FCN    | HRNetV2p-W48       | 512x512   |   20000 |      6.2 |          22.05 | 75.87 |         78.58 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json)     |
+| FCN    | HRNetV2p-W18-Small | 512x512   |   40000 | -        | -              | 66.61 |         70.00 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) |
+| FCN    | HRNetV2p-W18       | 512x512   |   40000 | -        | -              | 72.90 |         75.59 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json)     |
+| FCN    | HRNetV2p-W48       | 512x512   |   40000 | -        | -              | 76.24 |         78.49 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json)     |
+
+### Pascal Context
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                download                                                                                                                                                                                |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | HRNetV2p-W48       | 480x480   |   40000 | 6.1        | 8.86              | 45.14 |         47.42 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json)     |
+| FCN    | HRNetV2p-W48       | 480x480   |   80000 | -        | -              | 45.84 |         47.84 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json)     |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ff05aa595399d77ee51552c243e489f395a820e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf315a4f0e6f397768572c590a634cc1b9d298a9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60),
+    test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320)))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f04e935c39b08de66629f913b30675ffff2a8fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..99760c36d8399204ca8e35f32690bcd369676852
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a653dda19255214a1a412b645abddd3fc5c0d853
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..45ed99b6813324a58575f9bb74ce0534626e10c4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(decode_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..f06448b168af4d2dcc5a1f96e4430a7948b7e170
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py'
+]
+model = dict(decode_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..d74e95943afca04ba4073e411e0b713985384129
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(decode_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..52bc9f5e91f2fdf9ce8f9e3a873902dd8db56522
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(decode_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..d09931048f762cd2ac224d62c2fe2ed8e0e148c8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..584b7135fd95464f3d2c965440a0b92161cde09a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddbe3801f99dc21120548af85c55c7cdcfadaea2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x1024_160k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e31d26e093b6cb2d59b24bb3060c92bd7dccdea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x1024_40k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee2831d99d859c419b158b5f828d8a84063564ea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..22a3ce0b38f36efc96595fe1c3ef428fc1575eb0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0de5df75242e58ba572277d6fc5cf93675a097e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_20k_voc12aug.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..409db3c628edf63cd40e002f436884ce1fb75970
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_40k_voc12aug.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8400979b1e94dd42343de656ffbc5fbb7a07944
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_512x512_80k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e2d96cb6ce7249852cb1d9b36a2f24bdce00199
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..e28164e3dc9d321bf0a97b37f14f3184f95a27a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..394a61c99f038c94fce58ac9c422b7c3ee4b5f50
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x1024_160k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d37ab1d09ef51b1321ed8b3634fd99445efee543
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x1024_40k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9bab32b52ca41155062c7655986ed84677a8280
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..dff4fea85ced568c38d39408d459697e88ca0faa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8d1deb98659d05755c6316c2aff2295afb0bb9c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_20k_voc12aug.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..1084a57e978195df6d45a9a00415953ddbaeeb51
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_40k_voc12aug.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eca7fa4b8102c6225af3b484ffff5bdc7c0f201
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py
@@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_512x512_80k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e0e75e028db703129551402ae9d63c6a2861dc4b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/README.md
@@ -0,0 +1,35 @@
+# MobileNetV2: Inverted Residuals and Linear Bottlenecks
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{sandler2018mobilenetv2,
+  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={4510--4520},
+  year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                              download                                                                                                                                                                                                              |
+|------------|----------|-----------|--------:|---------:|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN        | M-V2-D8  | 512x1024  |   80000 |      3.4 | 14.2           | 61.54 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json)                                         |
+| PSPNet     | M-V2-D8  | 512x1024  |   80000 |      3.6  | 11.2           | 70.23 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json)                             |
+| DeepLabV3  | M-V2-D8  | 512x1024  |   80000 |      3.9 | 8.4            | 73.84 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json)                 |
+| DeepLabV3+ | M-V2-D8  | 512x1024  |   80000 |      5.1 | 8.4            | 75.20 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) |
+
+### ADE20k
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                      download                                                                                                                                                                                                      |
+|------------|----------|-----------|--------:|---------:|----------------|------:|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN        | M-V2-D8  | 512x512   |  160000 |      6.5 | 64.4           | 19.71 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json)                                         |
+| PSPNet     | M-V2-D8  | 512x512   |  160000 |      6.5 | 57.7           | 29.68 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json)                             |
+| DeepLabV3  | M-V2-D8  | 512x512   |  160000 |      6.8 | 39.9           | 34.08 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json)                 |
+| DeepLabV3+ | M-V2-D8  | 512x512   |  160000 |      8.2 | 43.1           | 34.02 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..267483d88ff25d75dc18c5c2d37375cd77c9639c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,12 @@
+_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..e15b8cc82b09ac3e64875936cdfd0f663aaba936
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py
@@ -0,0 +1,12 @@
+_base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4533d79a25771905d7f1900bf7b34037885a77a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,12 @@
+_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320, c1_in_channels=24),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..7615a7c19a3f19635b71801a55e4544be4d215b5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py
@@ -0,0 +1,12 @@
+_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320, c1_in_channels=24),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a535bd0ed8a4883134acdc52cf3f77c8d897ce82
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,12 @@
+_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5f6ab0d62e269e44dac016eb5ac58f49c1fa292
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py
@@ -0,0 +1,12 @@
+_base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7403bee864d833bcc31160665e4b54fdd738cc13
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,12 @@
+_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b72ac830be29b865ed52adaf41f2fe800f252cc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py
@@ -0,0 +1,12 @@
+_base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='mmcls://mobilenet_v2',
+    backbone=dict(
+        _delete_=True,
+        type='MobileNetV2',
+        widen_factor=1.,
+        strides=(1, 2, 2, 1, 1, 1, 1),
+        dilations=(1, 1, 1, 2, 2, 4, 4),
+        out_indices=(1, 2, 4, 6)),
+    decode_head=dict(in_channels=320),
+    auxiliary_head=dict(in_channels=96))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2bad2a731c63ba51ee05518af893618cf7ed94a0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/README.md
@@ -0,0 +1,28 @@
+# Searching for MobileNetV3
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{Howard_2019_ICCV,
+  title={Searching for MobileNetV3},
+  author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and Le, Quoc V. and Adam, Hartwig},
+  booktitle={The IEEE International Conference on Computer Vision (ICCV)},
+  pages={1314-1324},
+  month={October},
+  year={2019},
+  doi={10.1109/ICCV.2019.00140}}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                              download                                                                                                                                                                                                              |
+|------------|----------|-----------|--------:|---------:|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| LRASPP     | M-V3-D8            | 512x1024 | 320000 | 8.9 |   15.22   | 69.54 | 70.89 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json)|
+| LRASPP     | M-V3-D8 (scratch)  | 512x1024 | 320000 | 8.9 |   14.77   | 67.87 | 69.78 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json)|
+| LRASPP     | M-V3s-D8           | 512x1024 | 320000 | 5.3 |   23.64   | 64.11 | 66.42 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json)|
+| LRASPP     | M-V3s-D8 (scratch) | 512x1024 | 320000 | 5.3 |   24.50   | 62.74 | 65.01 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json)|
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e59a78b48be3a0997a31524fd78e7fad5636bc82
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py
@@ -0,0 +1,11 @@
+_base_ = [
+    '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+
+model = dict(pretrained='open-mmlab://contrib/mobilenet_v3_large')
+
+# Re-config the data sampler.
+data = dict(samples_per_gpu=4, workers_per_gpu=4)
+
+runner = dict(type='IterBasedRunner', max_iters=320000)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c5435142db6b1f81421f5fd96d07ece32b5f38
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+
+# Re-config the data sampler.
+data = dict(samples_per_gpu=4, workers_per_gpu=4)
+
+runner = dict(type='IterBasedRunner', max_iters=320000)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4e368b2a11ed6433d8f2594a2cc3184fe5ddfff
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py
@@ -0,0 +1,23 @@
+_base_ = './lraspp_m-v3-d8_512x1024_320k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://contrib/mobilenet_v3_small',
+    backbone=dict(
+        type='MobileNetV3',
+        arch='small',
+        out_indices=(0, 1, 12),
+        norm_cfg=norm_cfg),
+    decode_head=dict(
+        type='LRASPPHead',
+        in_channels=(16, 16, 576),
+        in_index=(0, 1, 2),
+        channels=128,
+        input_transform='multiple_select',
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c5f707200c5d8b6d39493762baf59023dcaad11
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py
@@ -0,0 +1,22 @@
+_base_ = './lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='MobileNetV3',
+        arch='small',
+        out_indices=(0, 1, 12),
+        norm_cfg=norm_cfg),
+    decode_head=dict(
+        type='LRASPPHead',
+        in_channels=(16, 16, 576),
+        in_index=(0, 1, 2),
+        channels=128,
+        input_transform='multiple_select',
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..76352e265a9db69640c650e192f66ab75b1f19b3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/README.md
@@ -0,0 +1,48 @@
+# Non-local Neural Networks
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{wang2018non,
+  title={Non-local neural networks},
+  author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={7794--7803},
+  year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|  Method  | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                           download                                                                                                                                                                                                           |
+|----------|----------|-----------|--------:|----------|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| NonLocal | R-50-D8  | 512x1024  |   40000 |      7.4 |           2.72 | 78.24 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json)     |
+| NonLocal | R-101-D8 | 512x1024  |   40000 |     10.9 |           1.95 | 78.66 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) |
+| NonLocal | R-50-D8  | 769x769   |   40000 |      8.9 |           1.52 | 78.33 |         79.92 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json)         |
+| NonLocal | R-101-D8 | 769x769   |   40000 |     12.8 |           1.05 | 78.57 |         80.29 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json)     |
+| NonLocal | R-50-D8  | 512x1024  |   80000 | -        | -              | 78.01 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json)     |
+| NonLocal | R-101-D8 | 512x1024  |   80000 | -        | -              | 78.93 | -             | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) |
+| NonLocal | R-50-D8  | 769x769   |   80000 | -        | -              | 79.05 |         80.68 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json)         |
+| NonLocal | R-101-D8 | 769x769   |   80000 | -        | -              | 79.40 |         80.85 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json)     |
+
+### ADE20K
+
+|  Method  | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                   download                                                                                                                                                                                                   |
+|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| NonLocal | R-50-D8  | 512x512   |   80000 |      9.1 |          21.37 | 40.75 |         42.05 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json)         |
+| NonLocal | R-101-D8 | 512x512   |   80000 |     12.6 |          13.97 | 42.90 |         44.27 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json)     |
+| NonLocal | R-50-D8  | 512x512   |  160000 | -        | -              | 42.03 |         43.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json)     |
+| NonLocal | R-101-D8 | 512x512   |  160000 | -        | -              | 43.36 |         44.83 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422-affd0f8d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+|  Method  | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                     download                                                                                                                                                                                                     |
+|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| NonLocal | R-50-D8  | 512x512   |   20000 |      6.4 |          21.21 | 76.20 |         77.12 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json)     |
+| NonLocal | R-101-D8 | 512x512   |   20000 |      9.8 |          14.01 | 78.15 |         78.86 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) |
+| NonLocal | R-50-D8  | 512x512   |   40000 | -        | -              | 76.65 |         77.47 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json)     |
+| NonLocal | R-101-D8 | 512x512   |   40000 | -        | -              | 78.27 |         79.12 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef7b06dd3806c1d93be41943ab4d7d49f68ac830
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a1e66cf1c239eac3c6a4876a35d82e7b6ccec2e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..df9c2aca9c7c1999d74a08a58aca5d220f7df54a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..490f9873a29f2626ad764825eec97f16ee7f9f96
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..40d9190fba223251b794c105b036e4794865f785
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c6f60dac7b457d3b936a5f7f43eb84713c77e05
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..23e6da7f23180c2350253ea400f444c0c3064fd6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0627e2b5a76dead859212d4cab116c160df21404
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './nonlocal_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d4dc7390370d0ffe21e7dcb686eeff7261952c4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0672b687ade8d554b71fdf0bc54de9f024fa30c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1adfbab882d9825a3f348ed99e401d1f164cd11
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e808d8072f34d09a7b0859f90261dd66c8815dd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..66b443abec3282242c0f794a2f91e066596e7ee9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a7a2f509ba6627ad5ab972ac090362bbcd2ecb7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..75adef324877d56c157b457eecbf8446aa6b192f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0726c293d6026898110f7fa55d5e7d2d55d7a02
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/nonlocal_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0a4c75c708330be9163dae675b0c1f84d7722728
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/README.md
@@ -0,0 +1,69 @@
+# Object-Contextual Representations for Semantic Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{YuanW18,
+  title={Ocnet: Object context network for scene parsing},
+  author={Yuhui Yuan and Jingdong Wang},
+  booktitle={arXiv preprint arXiv:1809.00916},
+  year={2018}
+}
+
+@article{YuanCW20,
+  title={Object-Contextual Representations for Semantic Segmentation},
+  author={Yuhui Yuan and Xilin Chen and Jingdong Wang},
+  booktitle={ECCV},
+  year={2020}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+#### HRNet backbone
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                               download                                                                                                                                                                                               |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet | HRNetV2p-W18-Small | 512x1024  |   40000 |      3.5 |          10.45 | 74.30 |         75.95 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json)     |
+| OCRNet | HRNetV2p-W18       | 512x1024  |   40000 |      4.7 |           7.50 | 77.72 |         79.49 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json)         |
+| OCRNet | HRNetV2p-W48       | 512x1024  |   40000 |        8 |           4.22 | 80.58 |         81.79 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json)         |
+| OCRNet | HRNetV2p-W18-Small | 512x1024  |   80000 | -        | -              | 77.16 |         78.66 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json)     |
+| OCRNet | HRNetV2p-W18       | 512x1024  |   80000 | -        | -              | 78.57 |         80.46 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json)         |
+| OCRNet | HRNetV2p-W48       | 512x1024  |   80000 | -        | -              | 80.70 |         81.87 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json)         |
+| OCRNet | HRNetV2p-W18-Small | 512x1024  |  160000 | -        | -              | 78.45 |         79.97 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) |
+| OCRNet | HRNetV2p-W18       | 512x1024  |  160000 | -        | -              | 79.47 |         80.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json)     |
+| OCRNet | HRNetV2p-W48       | 512x1024  |  160000 | -        | -              | 81.35 |         82.70 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json)     |
+
+#### ResNet backbone
+
+| Method |      Backbone      | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                               download                                                                                                                                                                                               |
+|--------|--------------------|-----------|--------|----------|-----------|----------------|------|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet  | R-101-D8 | 512x1024  | 8 |   40000 |  -   |   -  |   80.09  |  -  | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes-02ac0f13.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) |
+| OCRNet  | R-101-D8 | 512x1024  | 16 |   40000 |  8.8   |   3.02  |   80.30  |  -  | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes-db500f80.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) |
+| OCRNet  | R-101-D8 | 512x1024  | 16 |   80000 |  8.8   |   3.02  |   80.81  |  -  | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes-78688424.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) |
+
+### ADE20K
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                     download                                                                                                                                                                                     |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet | HRNetV2p-W18-Small | 512x512   |   80000 |      6.7 |          28.98 | 35.06 |         35.80 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json)     |
+| OCRNet | HRNetV2p-W18       | 512x512   |   80000 |      7.9 |          18.93 | 37.79 |         39.16 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json)         |
+| OCRNet | HRNetV2p-W48       | 512x512   |   80000 |     11.2 |          16.99 | 43.00 |         44.30 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json)         |
+| OCRNet | HRNetV2p-W18-Small | 512x512   |  160000 | -        | -              | 37.19 |         38.40 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) |
+| OCRNet | HRNetV2p-W18       | 512x512   |  160000 | -        | -              | 39.32 |         40.80 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json)     |
+| OCRNet | HRNetV2p-W48       | 512x512   |  160000 | -        | -              | 43.25 |         44.88 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json)     |
+
+### Pascal VOC 2012 + Aug
+
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                       download                                                                                                                                                                                       |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| OCRNet | HRNetV2p-W18-Small | 512x512   |   20000 |      3.5 |          31.55 | 71.70 |         73.84 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) |
+| OCRNet | HRNetV2p-W18       | 512x512   |   20000 |      4.7 |          19.91 | 74.75 |         77.11 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json)     |
+| OCRNet | HRNetV2p-W48       | 512x512   |   20000 |      8.1 |          17.83 | 77.72 |         79.87 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json)     |
+| OCRNet | HRNetV2p-W18-Small | 512x512   |   40000 | -        | -              | 72.76 |         74.60 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) |
+| OCRNet | HRNetV2p-W18       | 512x512   |   40000 | -        | -              | 74.98 |         77.40 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json)     |
+| OCRNet | HRNetV2p-W48       | 512x512   |   40000 | -        | -              | 77.14 |         79.71 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json)     |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c86eba17c46a863091d999b1a090e1237202ec5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c73b3839c8c1bc859eb3b8864256a00cfd022fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..506ad9319a9418f50650c477698c9b5cb9bf6663
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c86e18ea65c6aaa36a4fb6e2708f08c7ae1698
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py
@@ -0,0 +1,35 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+    dict(
+        type='FCNHead',
+        in_channels=[18, 36, 72, 144],
+        channels=sum([18, 36, 72, 144]),
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        kernel_size=1,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=-1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    dict(
+        type='OCRHead',
+        in_channels=[18, 36, 72, 144],
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        channels=512,
+        ocr_channels=256,
+        dropout_ratio=-1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab9d6446c9089bfae533b9dcd66e1352d81f74d0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py
@@ -0,0 +1,36 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+    dict(
+        type='FCNHead',
+        in_channels=[18, 36, 72, 144],
+        channels=sum([18, 36, 72, 144]),
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        kernel_size=1,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=-1,
+        num_classes=21,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    dict(
+        type='OCRHead',
+        in_channels=[18, 36, 72, 144],
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        channels=512,
+        ocr_channels=256,
+        dropout_ratio=-1,
+        num_classes=21,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..df79a9cf13963d26384b00ced0cf5efa9f68a420
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py
@@ -0,0 +1,36 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+    dict(
+        type='FCNHead',
+        in_channels=[18, 36, 72, 144],
+        channels=sum([18, 36, 72, 144]),
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        kernel_size=1,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=-1,
+        num_classes=21,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    dict(
+        type='OCRHead',
+        in_channels=[18, 36, 72, 144],
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        channels=512,
+        ocr_channels=256,
+        dropout_ratio=-1,
+        num_classes=21,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ad67722a50c2b2ece5fcb7f0dd1819061ff6b3e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py
@@ -0,0 +1,35 @@
+_base_ = [
+    '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+    dict(
+        type='FCNHead',
+        in_channels=[18, 36, 72, 144],
+        channels=sum([18, 36, 72, 144]),
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        kernel_size=1,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=-1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    dict(
+        type='OCRHead',
+        in_channels=[18, 36, 72, 144],
+        in_index=(0, 1, 2, 3),
+        input_transform='resize_concat',
+        channels=512,
+        ocr_channels=256,
+        dropout_ratio=-1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc7909785f743071cad2cd1032000405435f81d4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..923731f74f80c11e196f6099b1c84875686cd441
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..be6bf16a2fd234f3526bf8fb8c30179f1ef9df78
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..81f3d5cb91607134bb1d844d78df7a3c411c134d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceb944815b5a979ddb72015295375f6fe0c31a89
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..70babc91c99eb99ee4f941b34ea886236531832e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..36e77219ac2d7ee6795db7c40ad7341749a3b1c7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = './ocrnet_hr18_512x512_80k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c094391b1dfcef2fa6278f0c181fb50c303f7a4c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0aada9d8dcd792fd4fc7da8908cc11d44a9ff521
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b2e0094393151fa8975a0d53c48b6048b7e1929
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b3e8af9538e6ce3c929a902e3d1ee5be53469a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_160k_ade20k.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=150,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=150,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2dd6d1158bd31ecdd7874827fd37bffb5d26db6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=21,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=21,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..89e6309f55f6b939f7d79271513da4934bbacbb6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=21,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=21,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..04971226eb0fd6461b715358ac955dfb78102992
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py
@@ -0,0 +1,39 @@
+_base_ = './ocrnet_hr18_512x512_80k_ade20k.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[48, 96, 192, 384],
+            channels=sum([48, 96, 192, 384]),
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            kernel_size=1,
+            num_convs=1,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=150,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[48, 96, 192, 384],
+            channels=512,
+            ocr_channels=256,
+            input_transform='resize_concat',
+            in_index=(0, 1, 2, 3),
+            norm_cfg=norm_cfg,
+            dropout_ratio=-1,
+            num_classes=150,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3dd70b74a0bf912d8a6fd39f1f26be7f7571ccd6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
+optimizer = dict(lr=0.02)
+lr_config = dict(min_lr=2e-4)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e34f3432e581ff506c9d2951c98b5aad7b1be6a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..33d96c76f68b92217ed38afe9538144dfedc4fd2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
+optimizer = dict(lr=0.02)
+lr_config = dict(min_lr=2e-4)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0dea3e31f8c1eb3da33251fa1e10227ae98561e3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/README.md
@@ -0,0 +1,32 @@
+# PointRend: Image Segmentation as Rendering
+
+## Introduction
+
+[ALGORITHM]
+
+```
+@misc{alex2019pointrend,
+    title={PointRend: Image Segmentation as Rendering},
+    author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick},
+    year={2019},
+    eprint={1912.08193},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                                                                                                                              |
+|-----------|----------|-----------|--------:|---------:|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PointRend | R-50     | 512x1024  |   80000 |      3.1 | 8.48           | 76.47 | 78.13         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json)     |
+| PointRend | R-101    | 512x1024  |   80000 |      4.2 | 7.00           | 78.30 | 79.97         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) |
+
+### ADE20K
+
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                      download                                                                                                                                                                                      |
+|-----------|----------|-----------|--------:|---------:|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PointRend | R-50     | 512x512   |  160000 |      5.1 | 17.31          | 37.64 | 39.17         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json)     |
+| PointRend | R-101    | 512x512   |  160000 |      6.1 | 15.50          | 40.02 | 41.60         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8c14c8cf91d7cbcc05065a6dc387101dff8cdf6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pointrend_r50_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d1f8c8154431b056fb8371772f03dfa49ac1ad3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './pointrend_r50_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..96cbaa48d61ee208117d074e9f06bf4218407d78
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+    '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+lr_config = dict(warmup='linear', warmup_iters=200)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..db8c634c0f889c69ce80f86c445c493dcfdbd3c8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py
@@ -0,0 +1,32 @@
+_base_ = [
+    '../_base_/models/pointrend_r50.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(decode_head=[
+    dict(
+        type='FPNHead',
+        in_channels=[256, 256, 256, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=-1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    dict(
+        type='PointHead',
+        in_channels=[256],
+        in_index=[0],
+        channels=256,
+        num_fcs=3,
+        coarse_pred_each_layer=True,
+        dropout_ratio=-1,
+        num_classes=150,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+])
+lr_config = dict(warmup='linear', warmup_iters=200)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fcb24103b8e2dd649c0ad8938319f201e3254d19
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/README.md
@@ -0,0 +1,48 @@
+# PSANet: Point-wise Spatial Attention Network for Scene Parsing
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{zhao2018psanet,
+  title={Psanet: Point-wise spatial attention network for scene parsing},
+  author={Zhao, Hengshuang and Zhang, Yi and Liu, Shu and Shi, Jianping and Change Loy, Chen and Lin, Dahua and Jia, Jiaya},
+  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+  pages={267--283},
+  year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSANet | R-50-D8  | 512x1024  |   40000 |        7 |           3.17 | 77.63 |         79.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json)     |
+| PSANet | R-101-D8 | 512x1024  |   40000 |     10.5 |           2.20 | 79.14 |         80.19 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) |
+| PSANet | R-50-D8  | 769x769   |   40000 |      7.9 |           1.40 | 77.99 |         79.64 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json)         |
+| PSANet | R-101-D8 | 769x769   |   40000 |     11.9 |           0.98 | 78.43 |         80.26 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json)     |
+| PSANet | R-50-D8  | 512x1024  |   80000 | -        | -              | 77.24 |         78.69 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json)     |
+| PSANet | R-101-D8 | 512x1024  |   80000 | -        | -              | 79.31 |         80.53 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) |
+| PSANet | R-50-D8  | 769x769   |   80000 | -        | -              | 79.31 |         80.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json)         |
+| PSANet | R-101-D8 | 769x769   |   80000 | -        | -              | 79.69 |         80.89 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSANet | R-50-D8  | 512x512   |   80000 |        9 |          18.91 | 41.14 |         41.91 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json)         |
+| PSANet | R-101-D8 | 512x512   |   80000 |     12.5 |          13.13 | 43.80 |         44.75 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json)     |
+| PSANet | R-50-D8  | 512x512   |  160000 | -        | -              | 41.67 |         42.95 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json)     |
+| PSANet | R-101-D8 | 512x512   |  160000 | -        | -              | 43.74 |         45.38 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                           download                                                                                                                                                                                           |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSANet | R-50-D8  | 512x512   |   20000 |      6.9 |          18.24 | 76.39 |         77.34 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json)     |
+| PSANet | R-101-D8 | 512x512   |   20000 |     10.4 |          12.63 | 77.91 |         79.30 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) |
+| PSANet | R-50-D8  | 512x512   |   40000 | -        | -              | 76.30 |         77.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json)     |
+| PSANet | R-101-D8 | 512x512   |   40000 | -        | -              | 77.73 |         79.05 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..69d212f158552cf5a24f62174b24a9d4976477bb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc25d6aaf67ccb7e9fcb44ba2d803bebfa31b160
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f6795e5ef0e4bf1d10ee7ed4f608bf93ac24216
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a3c43495bbf9d302216d7ddf62df75446907a36
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..f62eef9773ddf41d996104de571bcda00c488e14
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8865a7c4d795d9de3f5bc6b762b305b3cabc22f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffc99f010903267fc7c1893f4a6b0dcd2cbe42e6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a9efc55ad2062facf3a568f8cdbba76c8c55950
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './psanet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6671fcb4bf8430bc0128cd93a4b8cedea1856b03
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a441013a4c1adc39fc064dbac23caaac9efdc4a6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c6364eb43e2abc95011205b569627ff9367d0e5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(mask_size=(66, 66), num_classes=150),
+    auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..af06cb66cc808c206d6946a4b2420a6942d3dc7e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..803c42da35eda861bf32ce0e7866cdc9fad96d0d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..0141a6d0925c2a2aa37517670a9f12ac7d3a02d4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(mask_size=(66, 66), num_classes=150),
+    auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..690f8b5ef359be8a8be3a2d768aede24216a8706
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0966b4770cc649e95525c366b09801408b99567a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/psanet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..931cad900680281d6970626b511124704e954c43
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/README.md
@@ -0,0 +1,62 @@
+# Pyramid Scene Parsing Network
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{zhao2017pspnet,
+  title={Pyramid Scene Parsing Network},
+  author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya},
+  booktitle={CVPR},
+  year={2017}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-50-D8  | 512x1024  |   40000 |      6.1 |           4.07 | 77.85 |         79.18 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json)     |
+| PSPNet | R-101-D8 | 512x1024  |   40000 |      9.6 |           2.68 | 78.34 |         79.74 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) |
+| PSPNet | R-50-D8  | 769x769   |   40000 |      6.9 |           1.76 | 78.26 |         79.88 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json)         |
+| PSPNet | R-101-D8 | 769x769   |   40000 |     10.9 |           1.15 | 79.08 |         80.28 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json)     |
+| PSPNet | R-18-D8  | 512x1024  |   80000 |      1.7 |          15.71 | 74.87 |         76.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json)     |
+| PSPNet | R-50-D8  | 512x1024  |   80000 | -        | -              | 78.55 |         79.79 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json)     |
+| PSPNet | R-101-D8 | 512x1024  |   80000 | -        | -              | 79.76 |         81.01 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) |
+| PSPNet | R-18-D8  | 769x769   |   80000 |      1.9 |           6.20 | 75.90 |         77.86 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json)         |
+| PSPNet | R-50-D8  | 769x769   |   80000 | -        | -              | 79.59 |         80.69 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json)         |
+| PSPNet | R-101-D8 | 769x769   |   80000 | -        | -              | 79.77 |         81.06 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json)     |
+| PSPNet | R-18b-D8 | 512x1024  |   80000 |      1.5 |          16.28 | 74.23 |         75.79 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json)     |
+| PSPNet | R-50b-D8 | 512x1024  |   80000 |      6.0 |           4.30 | 78.22 |         79.46 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json)     |
+| PSPNet | R-101b-D8| 512x1024  |   80000 |      9.5 |           2.76 | 79.69 |         80.79 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) |
+| PSPNet | R-18b-D8 | 769x769   |   80000 |      1.7 |           6.41 | 74.92 |         76.90 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json)         |
+| PSPNet | R-50b-D8 | 769x769   |   80000 |      6.8 |           1.88 | 78.50 |         79.96 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json)         |
+| PSPNet | R-101b-D8| 769x769   |   80000 |     10.8 |           1.17 | 78.87 |         80.04 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json)     |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-50-D8  | 512x512   |   80000 |      8.5 |          23.53 | 41.13 |         41.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json)         |
+| PSPNet | R-101-D8 | 512x512   |   80000 |       12 |          15.30 | 43.57 |         44.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json)     |
+| PSPNet | R-50-D8  | 512x512   |  160000 | -        | -              | 42.48 |         43.44 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json)     |
+| PSPNet | R-101-D8 | 512x512   |  160000 | -        | -              | 44.39 |         45.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                           download                                                                                                                                                                                           |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-50-D8  | 512x512   |   20000 |      6.1 |          23.59 | 76.78 |         77.61 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json)     |
+| PSPNet | R-101-D8 | 512x512   |   20000 |      9.6 |          15.02 | 78.47 |         79.25 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) |
+| PSPNet | R-50-D8  | 512x512   |   40000 | -        | -              | 77.29 |         78.48 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json)     |
+| PSPNet | R-101-D8 | 512x512   |   40000 | -        | -              | 78.52 |         79.57 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
+
+### Pascal Context
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                           download                                                                                                                                                                                           |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-101-D8 | 480x480   |   40000 |      8.8 |          9.68 | 46.60 |         47.78 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) |
+| PSPNet | R-101-D8 | 480x480   |   80000 | -        | -              | 46.03 |         47.15 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b5a990604a77238375cb6d2b8298a382a457dd6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..fda9110603d71e14cab6e537949be191f2adf6db
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..38fee11bc23d8c92c529acd0c02a68204e34ab91
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9931a07bc2d137eb49b3fa4dad8f8681d4f5e943
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..6107b41544378ad371cee95ee5ebc2e98ccbd9ad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..2221b202d6c53c4b04f2431d3344379cbfe06dd7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..15f578b6002c481ada06befc3ea66accbbdd1f66
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb7c3d55d57b09296ea24889b218f9a0fb997463
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6e7e58508f31627766b8ab748bd81cd51c77eca
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..59b8c6dd5ef234334bcdfa3d5e3594b7a9989b17
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab8a3d3e3fcc12dd41223af190e2ae04f14d1cb8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a7cb708e551e90a12ad4267e2af6938c353f0ba
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet101',
+    backbone=dict(type='ResNet', depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d914f93c023a6384e0e856b8608280cef589d5c6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5893e66a41cad73e8fb24aa58dc78ef002aecca5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnet18_v1c',
+    backbone=dict(depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..abeeedf84387d7846a8a2c10480b94c9d8405559
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..284be6d09af1806b99bee5b85286b55ce02e8cbd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(
+    pretrained='torchvision://resnet18',
+    backbone=dict(type='ResNet', depth=18),
+    decode_head=dict(
+        in_channels=512,
+        channels=128,
+    ),
+    auxiliary_head=dict(in_channels=256, channels=64))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..86da94de5b32576f04240a2d02dfeccc0d6ddd45
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbb02714b9e252bab38b3f9d9095dabe570b9005
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5deb5872b00a30d5c18a980c4d6c1b0d915908b9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e9972849d6899fe43f435284d0e0b1bc3b0e7a9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..86584573a3d1afac73041b85516112ac21f1f17c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd88154d5e0be1a519e973331e0a14ae8a7de13e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0c20c12f6bcf04b732dccaa4bfdba10bd10b5e6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..52efdf51d7d66c3205c1448c45ae281649a0901e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..145cadb24016eeea87fccff8171c5b0dfb78f7ab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..23a81eb7ef56a4cd8e7c9da65b86f3d0e562001a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..946bf4fc84236942a4462c2daa7637cace4e90cf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6087dcf9f7cc04e12a2b9bcbde7abc4a56e972e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py'
+model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..31bac01ec9f659d6c30f220a104c385326d3f04b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/README.md
@@ -0,0 +1,34 @@
+# ResNeSt: Split-Attention Networks
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{zhang2020resnest,
+title={ResNeSt: Split-Attention Networks},
+author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+journal={arXiv preprint arXiv:2004.08955},
+year={2020}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                         download                                                                                                                                                                                                         |
+|------------|----------|-----------|--------:|---------:|----------------|------:|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN        | S-101-D8 | 512x1024  |   80000 |     11.4 | 2.39           | 77.56 | 78.98         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json)                                         |
+| PSPNet     | S-101-D8 | 512x1024  |   80000 |     11.8 | 2.52           | 78.57 | 79.19         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json)                             |
+| DeepLabV3  | S-101-D8 | 512x1024  |   80000 |     11.9 | 1.88           | 79.67 | 80.51         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json)                 |
+| DeepLabV3+ | S-101-D8 | 512x1024  |   80000 |     13.2 | 2.36           | 79.62 | 80.27         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) |
+
+### ADE20k
+
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                 download                                                                                                                                                                                                 |
+|------------|----------|-----------|--------:|---------:|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN        | S-101-D8 | 512x512   |  160000 |     14.2 | 12.86          | 45.62 | 46.16         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json)                                         |
+| PSPNet     | S-101-D8 | 512x512   |  160000 |     14.2 | 13.02          | 45.44 | 46.28         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json)                             |
+| DeepLabV3  | S-101-D8 | 512x512   |  160000 |     14.6 | 9.28           | 45.71 | 46.59         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json)                 |
+| DeepLabV3+ | S-101-D8 | 512x512   |  160000 |     16.2 | 11.96          | 46.47 | 47.27         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f98398690eb3e1e77975d7fb94ea865424aa331b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3924ad679cb3d7ba731322f9cdb67410baae59a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..69bef7238345cf6aabb126012af992602f910287
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..d51bccb965dafc40d7859219d132dc9467740a1b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..33fa0252d8b4cc786f1297605c169ee6068195a4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcee8c280e833825f84b944c6db21e9a43125e06
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..9737849cbd7470b03ef3fcb3b1225283370eb503
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a622eae963401e143004a62ff53071ddbf61c01
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py
@@ -0,0 +1,9 @@
+_base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py'
+model = dict(
+    pretrained='open-mmlab://resnest101',
+    backbone=dict(
+        type='ResNeSt',
+        stem_channels=128,
+        radix=2,
+        reduction_factor=4,
+        avg_down_stride=True))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c73ade624817b61be2f226c6fae03d0b023c570a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/README.md
@@ -0,0 +1,35 @@
+# Panoptic Feature Pyramid Networks
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@article{Kirillov_2019,
+   title={Panoptic Feature Pyramid Networks},
+   ISBN={9781728132938},
+   url={http://dx.doi.org/10.1109/CVPR.2019.00656},
+   DOI={10.1109/cvpr.2019.00656},
+   journal={2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+   publisher={IEEE},
+   author={Kirillov, Alexander and Girshick, Ross and He, Kaiming and Dollar, Piotr},
+   year={2019},
+   month={Jun}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                               download                                                                                                                                                                               |
+|--------|----------|-----------|--------:|---------:|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FPN    | R-50     | 512x1024  |   80000 |      2.8 | 13.54          | 74.52 | 76.08         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json)     |
+| FPN    | R-101    | 512x1024  |   80000 |      3.9 | 10.29          | 75.80 | 77.40         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json) |
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                       download                                                                                                                                                                       |
+|--------|----------|-----------|--------:|---------:|----------------|------:|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FPN    | R-50     | 512x512   |  160000 |      4.9 | 55.77          | 37.49 | 39.09         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json)     |
+| FPN    | R-101    | 512x512   |  160000 |      5.9 | 40.58          | 39.35 | 40.72         | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r101_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r101_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1d3c98e72ec64565f2f93e39ee3511a38469753
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r101_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './fpn_r50_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r18_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r18_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd54a65db6ab93624e21affb6a1cc4600b652743
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r18_512x512_80k_ade20k.py
@@ -0,0 +1,4 @@
+_base_ = './fpn_r50_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet18_v1c',
+             backbone=dict(depth=18),
+             neck=dict(in_channels=[64, 128, 256, 512]))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r50_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r50_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a6456acc756745f56e62430cc56854d198695e6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_r50_512x512_80k_ade20k.py
@@ -0,0 +1,18 @@
+_base_ = [
+    '../_base_/models/fpn_r50.py',
+    '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py'
+]
+model = dict(decode_head=dict(num_classes=150))
+
+gpu_factor = 2 #mmseg默认4卡训练 我这边8卡的话 lr*2, iter/2
+# optimizer
+optimizer = dict(type='SGD', lr=0.01*gpu_factor, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=80000//gpu_factor)
+checkpoint_config = dict(by_epoch=False, interval=8000//gpu_factor)
+evaluation = dict(interval=8000, metric='mIoU')
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_x101324d_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_x101324d_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..86db4a27fb469a4d4060f511ac5d51fcbe048fe9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_x101324d_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = './fpn_r50_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnext101_32x4d',
+             backbone=dict(
+                 type='ResNeXt',
+                 depth=101,
+                 groups=32,
+                 base_width=4))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_x101644d_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_x101644d_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e5a798eb67a7080f94653b979134ce5213f4a34
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/sem_fpn/fpn_x101644d_512x512_80k_ade20k.py
@@ -0,0 +1,7 @@
+_base_ = './fpn_r50_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnext101_64x4d',
+             backbone=dict(
+                 type='ResNeXt',
+                 depth=101,
+                 groups=64,
+                 base_width=4))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d815510a19ade68c4962f04b8dee2c317f1788ce
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/README.md
@@ -0,0 +1,50 @@
+# U-Net: Convolutional Networks for Biomedical Image Segmentation
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{ronneberger2015u,
+  title={U-net: Convolutional networks for biomedical image segmentation},
+  author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
+  booktitle={International Conference on Medical image computing and computer-assisted intervention},
+  pages={234--241},
+  year={2015},
+  organization={Springer}
+}
+```
+
+## Results and models
+
+### DRIVE
+
+| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice  |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UNet-S5-D16 | FCN  |   584x565 |      64x64 |          42x42 | 40000 |         0.680 |  - | 78.67 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-26cee593.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive-20201223_191051.log.json)         |
+| UNet-S5-D16 | PSPNet  |   584x565 |      64x64 |          42x42 | 40000 |         0.599 |  - | 78.62 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json)         |
+| UNet-S5-D16 | DeepLabV3  |   584x565 |      64x64 |          42x42 | 40000 |         0.596 |  - | 78.69 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json)         |
+
+### STARE
+
+| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice  |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UNet-S5-D16 | FCN  |   605x700 |      128x128 |          85x85 | 40000 |         0.968 |  - | 81.02 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-6ea7cfda.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare-20201223_191051.log.json)         |
+| UNet-S5-D16 | PSPNet  |   605x700 |      128x128 |          85x85 | 40000 |         0.982 |  - | 81.22 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json)         |
+| UNet-S5-D16 | DeepLabV3  |   605x700 |      128x128 |          85x85 | 40000 |         0.999 |  - | 80.93 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json)         |
+
+### CHASE_DB1
+
+| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice  |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UNet-S5-D16 | FCN  |   960x999 |      128x128 |          85x85 | 40000 |         0.968 |  - | 80.24 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-95852f45.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json)         |
+| UNet-S5-D16 | PSPNet  |   960x999 |      128x128 |          85x85 | 40000 |         0.982 |  - | 80.36 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json)         |
+| UNet-S5-D16 | DeepLabV3  |   960x999 |      128x128 |          85x85 | 40000 |         0.999 |  - | 80.47 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json)         |
+
+### HRF
+
+| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice  |                                                                                                                                                                                         download                                                                                                                                                                                         |
+|--------|----------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UNet-S5-D16 | FCN  |   2336x3504 |      256x256 |          170x170 | 40000 |         2.525 |  - | 79.45 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-df3ec8c4.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json)         |
+| UNet-S5-D16 | PSPNet  |   2336x3504 |      256x256 |          170x170 | 40000 |         2.588 |  - | 80.07 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json)         |
+| UNet-S5-D16 | DeepLabV3  |   2336x3504 |      256x256 |          170x170 | 40000 |         2.604 |  - | 80.21 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json)         |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..c706cf3548e311a7930e5b58299e05af30c43d98
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/deeplabv3_unet_s5-d16.py',
+    '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ef02dcc491871f148b1ad038d281d250eb6e2f4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/stare.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..118428bc44d3078517e231399b131db492f2bc7e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/hrf.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f8862a0e89243d67634f37c3aca94ca98feff5c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/drive.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bc52d96293f214adf1e3e1878746ed8bd2434f6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/chase_db1.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d836c61dfd568dd4d29d876980001067dcaa200
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/stare.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..be8eec77792f4eb16475dc5ab8607fb5682f0acf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/hrf.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..80483ade4a4bc3dc5cb8805e8b74c100e872da0c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/drive.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..b085a17d6bab5f4d33668bfcf232e30f2a9830fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/pspnet_unet_s5-d16.py',
+    '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d729cea699e1c845549c74b52703c9ee3273662
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/stare.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..f57c9166b67a18fd74f474754b3baec6584b17cf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/hrf.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b5421ad6877e4b35b0a6ae6e516e577404547ce
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/drive.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42)))
+evaluation = dict(metric='mDice')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4d53a92f9bdb67ca9e4c3974ee368ca49d84619c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/README.md
@@ -0,0 +1,48 @@
+# Unified Perceptual Parsing for Scene Understanding
+
+## Introduction
+
+[ALGORITHM]
+
+```latex
+@inproceedings{xiao2018unified,
+  title={Unified perceptual parsing for scene understanding},
+  author={Xiao, Tete and Liu, Yingcheng and Zhou, Bolei and Jiang, Yuning and Sun, Jian},
+  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+  pages={418--434},
+  year={2018}
+}
+```
+
+## Results and models
+
+### Cityscapes
+
+| Method  | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                                                                                                                              |
+|---------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UPerNet | R-50     | 512x1024  |   40000 |      6.4 |           4.25 | 77.10 |         78.37 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json)     |
+| UPerNet | R-101    | 512x1024  |   40000 |      7.4 |           3.79 | 78.69 |         80.11 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) |
+| UPerNet | R-50     | 769x769   |   40000 |      7.2 |           1.76 | 77.98 |         79.70 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json)         |
+| UPerNet | R-101    | 769x769   |   40000 |      8.4 |           1.56 | 79.03 |         80.77 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json)     |
+| UPerNet | R-50     | 512x1024  |   80000 | -        | -              | 78.19 |         79.19 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json)     |
+| UPerNet | R-101    | 512x1024  |   80000 | -        | -              | 79.40 |         80.46 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) |
+| UPerNet | R-50     | 769x769   |   80000 | -        | -              | 79.39 |         80.92 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json)         |
+| UPerNet | R-101    | 769x769   |   80000 | -        | -              | 80.10 |         81.49 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json)     |
+
+### ADE20K
+
+| Method  | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                      download                                                                                                                                                                                      |
+|---------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UPerNet | R-50     | 512x512   |   80000 |      8.1 |          23.40 | 40.70 |         41.81 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json)         |
+| UPerNet | R-101    | 512x512   |   80000 |      9.1 |          20.34 | 42.91 |         43.96 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json)     |
+| UPerNet | R-50     | 512x512   |  160000 | -        | -              | 42.05 |         42.78 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json)     |
+| UPerNet | R-101    | 512x512   |  160000 | -        | -              | 43.82 |         44.85 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) |
+
+### Pascal VOC 2012 + Aug
+
+| Method  | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                        download                                                                                                                                                                                        |
+|---------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| UPerNet | R-50     | 512x512   |   20000 |      6.4 |          23.17 | 74.82 |         76.35 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json)     |
+| UPerNet | R-101    | 512x512   |   20000 |      7.5 |          19.98 | 77.10 |         78.29 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) |
+| UPerNet | R-50     | 512x512   |   40000 | -        | -              | 75.92 |         77.44 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json)     |
+| UPerNet | R-101    | 512x512   |   40000 | -        | -              | 77.43 |         78.56 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b90b597d831a664761d6051397d2b1862feb59c6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x1024_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..420ca2e42836099213c1f91cb925088cfe7c1269
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x1024_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..146f13eb79053cc69d4934d294aad9ba723b2577
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_160k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_160k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..56345d1806482ac822d709893fe6942f44be6f74
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_20k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_20k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..0669b741b9b3e3e1a309147b920d3d2a1952ab75
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_40k_voc12aug.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_40k_voc12aug.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..abfb9c5d9f35407d590cdc3325006b396ec52820
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_512x512_80k_ade20k.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_512x512_80k_ade20k.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5f3a3fae18cb769fd04b0c669785c5728cf479f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_769x769_40k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_769x769_40k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..a709165657d257df4fc76148d225261c63f88d8a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r101_769x769_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './upernet_r50_769x769_80k_cityscapes.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..d621e89ce62c06424db7c2e5f5fd00a0a2e85a61
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..95fffcc76c2ff4f61f8dd80a00d35b7875262a50
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5dd9aa4ed59d4939bcb49ffe129a9935e303201
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_160k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_20k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_20k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..95f5c09567144db47e07fc802b114bedd6a00725
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_20k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_20k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_40k_voc12aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_40k_voc12aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..9621fd1f5c24e582b4a1eda18fcc0a13d2bcb953
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_40k_voc12aug.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py',
+    '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_80k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_80k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..f561e309e3bddb439c90af930c4de5a0c7e209a7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_512x512_80k_ade20k.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_769x769_40k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_769x769_40k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..89b18aa2840d12e67339ce0b7a0561fa2ba0c6fa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_769x769_40k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_769x769_80k_cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_769x769_80k_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..29af98f2ebe341998fcf93f8a5c018cabcc0c0ba
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/configs/upernet/upernet_r50_769x769_80k_cityscapes.py
@@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/upernet_r50.py',
+    '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(align_corners=True),
+    auxiliary_head=dict(align_corners=True),
+    test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/demo/image_demo.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/demo/image_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..183f23871b7ff4607066b4e29727313e575b14e4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/demo/image_demo.py
@@ -0,0 +1,29 @@
+from argparse import ArgumentParser
+
+from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
+from mmseg.core.evaluation import get_palette
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--palette',
+        default='cityscapes',
+        help='Color palette used for segmentation map')
+    args = parser.parse_args()
+
+    # build the model from a config file and a checkpoint file
+    model = init_segmentor(args.config, args.checkpoint, device=args.device)
+    # test a single image
+    result = inference_segmentor(model, args.img)
+    # show the results
+    show_result_pyplot(model, args.img, result, get_palette(args.palette))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docker/Dockerfile b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docker/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..8e090f73a9e5b8aa09eee256e7876c8b4401f055
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docker/Dockerfile
@@ -0,0 +1,22 @@
+ARG PYTORCH="1.6.0"
+ARG CUDA="10.1"
+ARG CUDNN="7"
+
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+
+RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install mmsegmentation
+RUN conda clean --all
+
+RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://download.openmmlab.com/mmcv/dist/index.html
+RUN git clone https://github.com/open-mmlab/mmsegmenation.git /mmsegmentation
+WORKDIR /mmsegmentation
+RUN pip install -r requirements/build.txt
+RUN pip install --no-cache-dir -e .
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/Makefile b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/changelog.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/changelog.md
new file mode 100644
index 0000000000000000000000000000000000000000..faf1df3d217d728ea84f8c4685722018ae523a72
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/changelog.md
@@ -0,0 +1,157 @@
+## Changelog
+
+### V0.11 (02/02/2021)
+
+**Highlights**
+
+- Support memory efficient test, add more UNet models.
+
+**Bug Fixes**
+
+- Fixed TTA resize scale ([#334](https://github.com/open-mmlab/mmsegmentation/pull/334))
+- Fixed CI for pip 20.3 ([#307](https://github.com/open-mmlab/mmsegmentation/pull/307))
+- Fixed ADE20k test ([#359](https://github.com/open-mmlab/mmsegmentation/pull/359))
+
+**New Features**
+
+- Support memory efficient test ([#330](https://github.com/open-mmlab/mmsegmentation/pull/330))
+- Add more UNet benchmarks ([#324](https://github.com/open-mmlab/mmsegmentation/pull/324))
+- Support Lovasz Loss ([#351](https://github.com/open-mmlab/mmsegmentation/pull/351))
+
+**Improvements**
+
+- Move train_cfg/test_cfg inside model ([#341](https://github.com/open-mmlab/mmsegmentation/pull/341))
+
+### V0.10 (01/01/2021)
+
+**Highlights**
+
+- Support MobileNetV3, DMNet, APCNet. Add models of ResNet18V1b, ResNet18V1c, ResNet50V1b.
+
+**Bug Fixes**
+
+- Fixed CPU TTA ([#276](https://github.com/open-mmlab/mmsegmentation/pull/276))
+- Fixed CI for pip 20.3 ([#307](https://github.com/open-mmlab/mmsegmentation/pull/307))
+
+**New Features**
+
+- Add ResNet18V1b, ResNet18V1c, ResNet50V1b, ResNet101V1b models ([#316](https://github.com/open-mmlab/mmsegmentation/pull/316))
+- Support MobileNetV3 ([#268](https://github.com/open-mmlab/mmsegmentation/pull/268))
+- Add 4 retinal vessel segmentation benchmark  ([#315](https://github.com/open-mmlab/mmsegmentation/pull/315))
+- Support DMNet ([#313](https://github.com/open-mmlab/mmsegmentation/pull/313))
+- Support APCNet ([#299](https://github.com/open-mmlab/mmsegmentation/pull/299))
+
+**Improvements**
+
+- Refactor Documentation page ([#311](https://github.com/open-mmlab/mmsegmentation/pull/311))
+- Support resize data augmentation according to original image size ([#291](https://github.com/open-mmlab/mmsegmentation/pull/291))
+
+### V0.9 (30/11/2020)
+
+**Highlights**
+
+- Support 4 medical dataset, UNet and CGNet.
+
+**New Features**
+
+- Support RandomRotate transform ([#215](https://github.com/open-mmlab/mmsegmentation/pull/215), [#260](https://github.com/open-mmlab/mmsegmentation/pull/260))
+- Support RGB2Gray transform ([#227](https://github.com/open-mmlab/mmsegmentation/pull/227))
+- Support Rerange transform ([#228](https://github.com/open-mmlab/mmsegmentation/pull/228))
+- Support ignore_index for BCE loss ([#210](https://github.com/open-mmlab/mmsegmentation/pull/210))
+- Add modelzoo statistics ([#263](https://github.com/open-mmlab/mmsegmentation/pull/263))
+- Support Dice evaluation metric ([#225](https://github.com/open-mmlab/mmsegmentation/pull/225))
+- Support Adjust Gamma transform ([#232](https://github.com/open-mmlab/mmsegmentation/pull/232))
+- Support CLAHE transform ([#229](https://github.com/open-mmlab/mmsegmentation/pull/229))
+
+**Bug Fixes**
+
+- Fixed detail API link ([#267](https://github.com/open-mmlab/mmsegmentation/pull/267))
+
+### V0.8 (03/11/2020)
+
+**Highlights**
+
+- Support 4 medical dataset, UNet and CGNet.
+
+**New Features**
+
+- Support customize runner ([#118](https://github.com/open-mmlab/mmsegmentation/pull/118))
+- Support UNet ([#161](https://github.com/open-mmlab/mmsegmentation/pull/162))
+- Support CHASE_DB1, DRIVE, STARE, HRD ([#203](https://github.com/open-mmlab/mmsegmentation/pull/203))
+- Support CGNet ([#223](https://github.com/open-mmlab/mmsegmentation/pull/223))
+
+### V0.7 (07/10/2020)
+
+**Highlights**
+
+- Support Pascal Context dataset and customizing class dataset.
+
+**Bug Fixes**
+
+- Fixed CPU inference ([#153](https://github.com/open-mmlab/mmsegmentation/pull/153))
+
+**New Features**
+
+- Add DeepLab OS16 models ([#154](https://github.com/open-mmlab/mmsegmentation/pull/154))
+- Support Pascal Context dataset ([#133](https://github.com/open-mmlab/mmsegmentation/pull/133))
+- Support customizing dataset classes ([#71](https://github.com/open-mmlab/mmsegmentation/pull/71))
+- Support customizing dataset palette ([#157](https://github.com/open-mmlab/mmsegmentation/pull/157))
+
+**Improvements**
+
+- Support 4D tensor output in ONNX ([#150](https://github.com/open-mmlab/mmsegmentation/pull/150))
+- Remove redundancies in ONNX export ([#160](https://github.com/open-mmlab/mmsegmentation/pull/160))
+- Migrate to MMCV DepthwiseSeparableConv ([#158](https://github.com/open-mmlab/mmsegmentation/pull/158))
+- Migrate to MMCV collect_env ([#137](https://github.com/open-mmlab/mmsegmentation/pull/137))
+- Use img_prefix and seg_prefix for loading ([#153](https://github.com/open-mmlab/mmsegmentation/pull/153))
+
+### V0.6 (10/09/2020)
+
+**Highlights**
+
+- Support new methods i.e. MobileNetV2, EMANet, DNL, PointRend, Semantic FPN, Fast-SCNN, ResNeSt.
+
+**Bug Fixes**
+
+- Fixed sliding inference ONNX export ([#90](https://github.com/open-mmlab/mmsegmentation/pull/90))
+
+**New Features**
+
+- Support MobileNet v2 ([#86](https://github.com/open-mmlab/mmsegmentation/pull/86))
+- Support EMANet ([#34](https://github.com/open-mmlab/mmsegmentation/pull/34))
+- Support DNL ([#37](https://github.com/open-mmlab/mmsegmentation/pull/37))
+- Support PointRend ([#109](https://github.com/open-mmlab/mmsegmentation/pull/109))
+- Support Semantic FPN ([#94](https://github.com/open-mmlab/mmsegmentation/pull/94))
+- Support Fast-SCNN ([#58](https://github.com/open-mmlab/mmsegmentation/pull/58))
+- Support ResNeSt backbone ([#47](https://github.com/open-mmlab/mmsegmentation/pull/47))
+- Support ONNX export (experimental) ([#12](https://github.com/open-mmlab/mmsegmentation/pull/12))
+
+**Improvements**
+
+- Support Upsample in ONNX ([#100](https://github.com/open-mmlab/mmsegmentation/pull/100))
+- Support Windows install (experimental) ([#75](https://github.com/open-mmlab/mmsegmentation/pull/75))
+- Add more OCRNet results ([#20](https://github.com/open-mmlab/mmsegmentation/pull/20))
+- Add PyTorch 1.6 CI ([#64](https://github.com/open-mmlab/mmsegmentation/pull/64))
+- Get version and githash automatically ([#55](https://github.com/open-mmlab/mmsegmentation/pull/55))
+
+### v0.5.1 (11/08/2020)
+
+**Highlights**
+
+- Support FP16 and more generalized OHEM
+
+**Bug Fixes**
+
+- Fixed Pascal VOC conversion script (#19)
+- Fixed OHEM weight assign bug (#54)
+- Fixed palette type when palette is not given (#27)
+
+**New Features**
+
+- Support FP16 (#21)
+- Generalized OHEM (#54)
+
+**Improvements**
+
+- Add load-from flag (#33)
+- Fixed training tricks doc about different learning rates of model (#26)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/conf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..f472acb30abdbcf5191926a8d89f478c1210744c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/conf.py
@@ -0,0 +1,88 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.abspath('..'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'MMSegmentation'
+copyright = '2020-2020, OpenMMLab'
+author = 'MMSegmentation Authors'
+version_file = '../mmseg/version.py'
+
+
+def get_version():
+    with open(version_file, 'r') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+
+
+# The full version, including alpha/beta/rc tags
+release = get_version()
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    'recommonmark',
+    'sphinx_markdown_tables',
+]
+
+autodoc_mock_imports = ['matplotlib', 'pycocotools', 'mmseg.version']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
+
+# The master toctree document.
+master_doc = 'index'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+
+def builder_inited_handler(app):
+    subprocess.run(['./stat.py'])
+
+
+def setup(app):
+    app.connect('builder-inited', builder_inited_handler)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/dataset_prepare.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/dataset_prepare.md
new file mode 100644
index 0000000000000000000000000000000000000000..5407339f13909c3bf32556dc273076d8bb351ba6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/dataset_prepare.md
@@ -0,0 +1,165 @@
+## Prepare datasets
+
+It is recommended to symlink the dataset root to `$MMSEGMENTATION/data`.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+```none
+mmsegmentation
+├── mmseg
+├── tools
+├── configs
+├── data
+│   ├── cityscapes
+│   │   ├── leftImg8bit
+│   │   │   ├── train
+│   │   │   ├── val
+│   │   ├── gtFine
+│   │   │   ├── train
+│   │   │   ├── val
+│   ├── VOCdevkit
+│   │   ├── VOC2012
+│   │   │   ├── JPEGImages
+│   │   │   ├── SegmentationClass
+│   │   │   ├── ImageSets
+│   │   │   │   ├── Segmentation
+│   │   ├── VOC2010
+│   │   │   ├── JPEGImages
+│   │   │   ├── SegmentationClassContext
+│   │   │   ├── ImageSets
+│   │   │   │   ├── SegmentationContext
+│   │   │   │   │   ├── train.txt
+│   │   │   │   │   ├── val.txt
+│   │   │   ├── trainval_merged.json
+│   │   ├── VOCaug
+│   │   │   ├── dataset
+│   │   │   │   ├── cls
+│   ├── ade
+│   │   ├── ADEChallengeData2016
+│   │   │   ├── annotations
+│   │   │   │   ├── training
+│   │   │   │   ├── validation
+│   │   │   ├── images
+│   │   │   │   ├── training
+│   │   │   │   ├── validation
+│   ├── CHASE_DB1
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+│   ├── DRIVE
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+│   ├── HRF
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+│   ├── STARE
+│   │   ├── images
+│   │   │   ├── training
+│   │   │   ├── validation
+│   │   ├── annotations
+│   │   │   ├── training
+│   │   │   ├── validation
+
+```
+
+### Cityscapes
+
+The data could be found [here](https://www.cityscapes-dataset.com/downloads/) after registration.
+
+By convention, `**labelTrainIds.png` are used for cityscapes training.
+We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)
+to generate `**labelTrainIds.png`.
+
+```shell
+# --nproc means 8 process for conversion, which could be omitted as well.
+python tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8
+```
+
+### Pascal VOC
+
+Pascal VOC 2012 could be downloaded from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar).
+Beside, most recent works on Pascal VOC dataset usually exploit extra augmentation data, which could be found [here](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz).
+
+If you would like to use augmented VOC dataset, please run following command to convert augmentation annotations into proper format.
+
+```shell
+# --nproc means 8 process for conversion, which could be omitted as well.
+python tools/convert_datasets/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8
+```
+
+Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/tutorials/new_dataset.md#concatenate-dataset) for details about how to concatenate them and train them together.
+
+### ADE20K
+
+The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
+We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/release_test.zip).
+
+### Pascal Context
+
+The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration.
+
+To split the training and validation set from original dataset, you may download trainval_merged.json from [here](https://codalabuser.blob.core.windows.net/public/trainval_merged.json).
+
+If you would like to use Pascal Context dataset, please install [Detail](https://github.com/zhanghang1989/detail-api) and then run the following command to convert annotations into proper format.
+
+```shell
+python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
+```
+
+### CHASE DB1
+
+The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip).
+
+To convert CHASE DB1 dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip
+```
+
+The script will make directory structure automatically.
+
+### DRIVE
+
+The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially.
+
+To convert DRIVE dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip
+```
+
+The script will make directory structure automatically.
+
+### HRF
+
+First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip).
+
+To convert HRF dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip
+```
+
+The script will make directory structure automatically.
+
+### STARE
+
+First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar).
+
+To convert STARE dataset to MMSegmentation format, you should run the following command:
+
+```shell
+python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar
+```
+
+The script will make directory structure automatically.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/get_started.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/get_started.md
new file mode 100644
index 0000000000000000000000000000000000000000..3182c53451dae3024a4e99aace1c766856773d66
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/get_started.md
@@ -0,0 +1,193 @@
+## Prerequisites
+
+- Linux or macOS (Windows is in experimental support)
+- Python 3.6+
+- PyTorch 1.3+
+- CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible)
+- GCC 5+
+- [MMCV](https://mmcv.readthedocs.io/en/latest/#installation)
+
+Note: You need to run `pip uninstall mmcv` first if you have mmcv installed.
+If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`.
+
+## Installation
+
+a. Create a conda virtual environment and activate it.
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+```
+
+b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).
+Here we use PyTorch 1.6.0 and CUDA 10.1.
+You may also switch to other version by specifying the version number.
+
+```shell
+conda install pytorch=1.6.0 torchvision cudatoolkit=10.1 -c pytorch
+```
+
+c. Install [MMCV](https://mmcv.readthedocs.io/en/latest/) following the [official instructions](https://mmcv.readthedocs.io/en/latest/#installation).
+Either `mmcv` or `mmcv-full` is compatible with MMSegmentation, but for methods like CCNet and PSANet, CUDA ops in `mmcv-full` is required.
+
+**Install mmcv for Linux:**
+
+The pre-build mmcv-full (with PyTorch 1.5 and CUDA 10.1) can be installed by running: (other available versions could be found [here](https://mmcv.readthedocs.io/en/latest/#install-with-pip))
+
+```shell
+pip install mmcv-full==latest+torch1.5.0+cu101 -f https://download.openmmlab.com/mmcv/dist/index.html
+```
+
+**Install mmcv for Windows (Experimental):**
+
+For Windows, the installation of MMCV requires native C++ compilers, such as cl.exe. Please add the compiler to %PATH%.
+
+A typical path for cl.exe looks like the following if you have Windows SDK and Visual Studio installed on your computer:
+
+```shell
+C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC\Tools\MSVC\14.26.28801\bin\Hostx86\x64
+```
+
+Or you should download the cl compiler from web and then set up the path.
+
+Then, clone mmcv from github and install mmcv via pip:
+
+```shell
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv
+pip install -e .
+```
+
+Or simply:
+
+```shell
+pip install mmcv
+```
+
+Currently, mmcv-full is not supported on Windows.
+
+d. Install MMSegmentation.
+
+```shell
+pip install mmsegmentation # install the latest release
+```
+
+or
+
+```shell
+pip install git+https://github.com/open-mmlab/mmsegmentation.git # install the master branch
+```
+
+Instead, if you would like to install MMSegmentation in `dev` mode, run following
+
+```shell
+git clone https://github.com/open-mmlab/mmsegmentation.git
+cd mmsegmentation
+pip install -e .  # or "python setup.py develop"
+```
+
+Note:
+
+1. When training or testing models on Windows, please ensure that all the '\\' in paths are replaced with '/'. Add .replace('\\', '/') to your python code wherever path strings occur.
+2. The `version+git_hash` will also be saved in trained models meta, e.g. 0.5.0+c415a2e.
+3. When MMsegmentation is installed on `dev` mode, any local modifications made to the code will take effect without the need to reinstall it.
+4. If you would like to use `opencv-python-headless` instead of `opencv-python`,
+   you can install it before installing MMCV.
+5. Some dependencies are optional. Simply running `pip install -e .` will only install the minimum runtime requirements.
+   To use optional dependencies like `cityscapessripts`  either install them manually with `pip install -r requirements/optional.txt` or specify desired extras when calling `pip` (e.g. `pip install -e .[optional]`). Valid keys for the extras field are: `all`, `tests`, `build`, and `optional`.
+
+### A from-scratch setup script
+
+#### Linux
+
+Here is a full script for setting up mmsegmentation with conda and link the dataset path (supposing that your dataset path is $DATA_ROOT).
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+
+conda install pytorch=1.6.0 torchvision cudatoolkit=10.1 -c pytorch
+pip install mmcv-full==latest+torch1.5.0+cu101 -f https://download.openmmlab.com/mmcv/dist/index.html
+git clone https://github.com/open-mmlab/mmsegmentation.git
+cd mmsegmentation
+pip install -e .  # or "python setup.py develop"
+
+mkdir data
+ln -s $DATA_ROOT data
+```
+
+#### Windows(Experimental)
+
+Here is a full script for setting up mmsegmentation with conda and link the dataset path (supposing that your dataset path is
+%DATA_ROOT%. Notice: It must be an absolute path).
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+
+conda install pytorch=1.6.0 torchvision cudatoolkit=10.1 -c pytorch
+set PATH=full\path\to\your\cpp\compiler;%PATH%
+pip install mmcv
+
+git clone https://github.com/open-mmlab/mmsegmentation.git
+cd mmsegmentation
+pip install -e .  # or "python setup.py develop"
+
+mklink /D data %DATA_ROOT%
+```
+
+#### Developing with multiple MMSegmentation versions
+
+The train and test scripts already modify the `PYTHONPATH` to ensure the script use the MMSegmentation in the current directory.
+
+To use the default MMSegmentation installed in the environment rather than that you are working with, you can remove the following line in those scripts
+
+```shell
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+```
+
+## Verification
+
+To verify whether MMSegmentation and the required environment are installed correctly, we can run sample python codes to initialize a detector and inference a demo image:
+
+```python
+from mmseg.apis import inference_segmentor, init_segmentor
+import mmcv
+
+config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'
+checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
+
+# build the model from a config file and a checkpoint file
+model = init_segmentor(config_file, checkpoint_file, device='cuda:0')
+
+# test a single image and show the results
+img = 'test.jpg'  # or img = mmcv.imread(img), which will only load it once
+result = inference_segmentor(model, img)
+# visualize the results in a new window
+model.show_result(img, result, show=True)
+# or save the visualization results to image files
+model.show_result(img, result, out_file='result.jpg')
+
+# test a video and show the results
+video = mmcv.VideoReader('video.mp4')
+for frame in video:
+   result = inference_segmentor(model, frame)
+   model.show_result(frame, result, wait_time=1)
+```
+
+The above code is supposed to run successfully upon you finish the installation.
+
+We also provide a demo script to test a single image.
+
+```shell
+python demo/image_demo.py ${IMAGE_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${DEVICE_NAME}] [--palette-thr ${PALETTE}]
+```
+
+Examples:
+
+```shell
+python demo/image_demo.py demo/demo.jpg configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+    checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth --device cuda:0 --palette cityscapes
+```
+
+A notebook demo can be found in [demo/inference_demo.ipynb](../demo/inference_demo.ipynb).
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/inference.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/inference.md
new file mode 100644
index 0000000000000000000000000000000000000000..d7bc21b65acb9da4a38ade92be6ccf3c56574982
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/inference.md
@@ -0,0 +1,101 @@
+## Inference with pretrained models
+
+We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),
+and also some high-level apis for easier integration to other projects.
+
+### Test a dataset
+
+- single GPU
+- single node multiple GPU
+- multiple node
+
+You can use the following commands to test a dataset.
+
+```shell
+# single-gpu testing
+python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show]
+
+# multi-gpu testing
+./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]
+```
+
+Optional arguments:
+
+- `RESULT_FILE`: Filename of the output results in pickle format. If not specified, the results will not be saved to a file.
+- `EVAL_METRICS`: Items to be evaluated on the results. Allowed values depend on the dataset, e.g., `mIoU` is available for all dataset. Cityscapes could be evaluated by `cityscapes` as well as standard `mIoU` metrics.
+- `--show`: If specified, segmentation results will be plotted on the images and shown in a new window. It is only applicable to single GPU testing and used for debugging and visualization. Please make sure that GUI is available in your environment, otherwise you may encounter the error like `cannot connect to X server`.
+- `--show-dir`: If specified, segmentation results will be plotted on the images and saved to the specified directory. It is only applicable to single GPU testing and used for debugging and visualization. You do NOT need a GUI available in your environment for using this option.
+- `--eval-options`: Optional parameters during evaluation. When `efficient_test=True`, it will save intermediate results to local files to save CPU memory. Make sure that you have enough local storage space (more than 20GB).
+
+Examples:
+
+Assume that you have already downloaded the checkpoints to the directory `checkpoints/`.
+
+1. Test PSPNet and visualize the results. Press any key for the next image.
+
+    ```shell
+    python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+        checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+        --show
+    ```
+
+2. Test PSPNet and save the painted images for latter visualization.
+
+    ```shell
+    python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+        checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+        --show-dir psp_r50_512x1024_40ki_cityscapes_results
+    ```
+
+3. Test PSPNet on PASCAL VOC (without saving the test results) and evaluate the mIoU.
+
+    ```shell
+    python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_20k_voc12aug.py \
+        checkpoints/pspnet_r50-d8_512x1024_20k_voc12aug_20200605_003338-c57ef100.pth \
+        --eval mAP
+    ```
+
+4. Test PSPNet with 4 GPUs, and evaluate the standard mIoU and cityscapes metric.
+
+    ```shell
+    ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+        checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+        4 --out results.pkl --eval mIoU cityscapes
+    ```
+
+   Note: There is some gap (~0.1%) between cityscapes mIoU and our mIoU. The reason is that cityscapes average each class with class size by default.
+   We use the simple version without average for all datasets.
+
+5. Test PSPNet on cityscapes test split with 4 GPUs, and generate the png files to be submit to the official evaluation server.
+
+   First, add following to config file `configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py`,
+
+    ```python
+    data = dict(
+        test=dict(
+            img_dir='leftImg8bit/test',
+            ann_dir='gtFine/test'))
+    ```
+
+   Then run test.
+
+    ```shell
+    ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
+        checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
+        4 --format-only --eval-options "imgfile_prefix=./pspnet_test_results"
+    ```
+
+   You will get png files under `./pspnet_test_results` directory.
+   You may run `zip -r results.zip pspnet_test_results/` and submit the zip file to [evaluation server](https://www.cityscapes-dataset.com/submit/).
+
+6. CPU memory efficient test DeeplabV3+ on Cityscapes (without saving the test results) and evaluate the mIoU.
+
+    ```shell
+    python tools/test.py \
+    configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py \
+    deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \
+    --eval-options efficient_test=True \
+    --eval mIoU
+    ```
+
+    Using ```pmap``` to view CPU memory footprint, it used 2.25GB CPU memory with ```efficient_test=True``` and 11.06GB CPU memory with ```efficient_test=False``` . This optional parameter can save a lot of memory.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/make.bat b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..922152e96a04a242e6fc40f124261d74890617d8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/model_zoo.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/model_zoo.md
new file mode 100644
index 0000000000000000000000000000000000000000..2d4c1c2ac999c771e7048c36bd94d316457f0e50
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/model_zoo.md
@@ -0,0 +1,163 @@
+# Benchmark and Model Zoo
+
+## Common settings
+
+* We use distributed training with 4 GPUs by default.
+* All pytorch-style pretrained backbones on ImageNet are train by ourselves, with the same procedure in the [paper](https://arxiv.org/pdf/1812.01187.pdf).
+  Our ResNet style backbone are based on ResNetV1c variant, where the 7x7 conv in the input stem is replaced with three 3x3 convs.
+* For the consistency across different hardwares, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 4 GPUs with `torch.backends.cudnn.benchmark=False`.
+  Note that this value is usually less than what `nvidia-smi` shows.
+* We report the inference time as the total time of network forwarding and post-processing, excluding the data loading time.
+  Results are obtained with the script `tools/benchmark.py` which computes the average time on 200 images with `torch.backends.cudnn.benchmark=False`.
+* There are two inference modes in this framework.
+
+  * `slide` mode: The `test_cfg` will be like `dict(mode='slide', crop_size=(769, 769), stride=(513, 513))`.
+
+    In this mode, multiple patches will be cropped from input image, passed into network individually.
+    The crop size and stride between patches are specified by `crop_size` and `stride`.
+    The overlapping area will be merged by average
+
+  * `whole` mode: The `test_cfg` will be like `dict(mode='whole')`.
+
+    In this mode, the whole imaged will be passed into network directly.
+
+    By default, we use `slide` inference for 769x769 trained model, `whole` inference for the rest.
+* For input size of 8x+1 (e.g. 769), `align_corner=True` is adopted as a traditional practice.
+  Otherwise, for input size of 8x (e.g. 512, 1024), `align_corner=False` is adopted.
+
+## Baselines
+
+### FCN
+
+Please refer to [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) for details.
+
+### PSPNet
+
+Please refer to [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) for details.
+
+### DeepLabV3
+
+Please refer to [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) for details.
+
+### PSANet
+
+Please refer to [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) for details.
+
+### DeepLabV3+
+
+Please refer to [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) for details.
+
+### UPerNet
+
+Please refer to [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) for details.
+
+### NonLocal Net
+
+Please refer to [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nlnet) for details.
+
+### EncNet
+
+Please refer to [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) for details.
+
+### CCNet
+
+Please refer to [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) for details.
+
+### DANet
+
+Please refer to [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) for details.
+
+### APCNet
+
+Please refer to [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) for details.
+
+### HRNet
+
+Please refer to [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) for details.
+
+### GCNet
+
+Please refer to [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) for details.
+
+### DMNet
+
+Please refer to [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) for details.
+
+### ANN
+
+Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) for details.
+
+### OCRNet
+
+Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details.
+
+### Fast-SCNN
+
+Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) for details.
+
+### ResNeSt
+
+Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) for details.
+
+### Semantic FPN
+
+Please refer to [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/semfpn) for details.
+
+### PointRend
+
+Please refer to [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) for details.
+
+### MobileNetV2
+
+Please refer to [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) for details.
+
+### MobileNetV3
+
+Please refer to [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) for details.
+
+### EMANet
+
+Please refer to [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) for details.
+
+### DNLNet
+
+Please refer to [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) for details.
+
+### CGNet
+
+Please refer to [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) for details.
+
+### Mixed Precision (FP16) Training
+
+Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details.
+
+## Speed benchmark
+
+### Hardware
+
+* 8 NVIDIA Tesla V100 (32G) GPUs
+* Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
+
+### Software environment
+
+* Python 3.7
+* PyTorch 1.5
+* CUDA 10.1
+* CUDNN 7.6.03
+* NCCL 2.4.08
+
+### Training speed
+
+For fair comparison, we benchmark all implementations with ResNet-101V1c.
+The input size is fixed to 1024x512 with batch size 2.
+
+The training speed is reported as followed, in terms of second per iter (s/iter). The lower, the better.
+
+| Implementation | PSPNet (s/iter) | DeepLabV3+ (s/iter) |
+|----------------|-----------------|---------------------|
+| [MMSegmentation](https://github.com/open-mmlab/mmsegmentation)              | **0.83**       | **0.85**   |
+| [SegmenTron](https://github.com/LikeLy-Journey/SegmenTron)                  | 0.84           | 0.85       |
+| [CASILVision](https://github.com/CSAILVision/semantic-segmentation-pytorch) | 1.15           | N/A          |
+| [vedaseg](https://github.com/Media-Smart/vedaseg)                           | 0.95           | 1.25       |
+
+Note: The output stride of DeepLabV3+ is 8.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/stat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/stat.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aaf0607004e1aa9d92001da78521afe51057034
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/stat.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+import functools as func
+import glob
+import os.path as osp
+import re
+
+import numpy as np
+
+url_prefix = 'https://github.com/open-mmlab/mmsegmentation/blob/master/'
+
+files = sorted(glob.glob('../configs/*/README.md'))
+
+stats = []
+titles = []
+num_ckpts = 0
+
+for f in files:
+    url = osp.dirname(f.replace('../', url_prefix))
+
+    with open(f, 'r') as content_file:
+        content = content_file.read()
+
+    title = content.split('\n')[0].replace('#', '').strip()
+    ckpts = set(x.lower().strip()
+                for x in re.findall(r'https?://download.*\.pth', content)
+                if 'mmsegmentation' in x)
+    if len(ckpts) == 0:
+        continue
+
+    _papertype = [x for x in re.findall(r'\[([A-Z]+)\]', content)]
+    assert len(_papertype) > 0
+    papertype = _papertype[0]
+
+    paper = set([(papertype, title)])
+
+    titles.append(title)
+    num_ckpts += len(ckpts)
+    statsmsg = f"""
+\t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts)
+"""
+    stats.append((paper, ckpts, statsmsg))
+
+allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats])
+msglist = '\n'.join(x for _, _, x in stats)
+
+papertypes, papercounts = np.unique([t for t, _ in allpapers],
+                                    return_counts=True)
+countstr = '\n'.join(
+    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# Model Zoo Statistics
+
+* Number of papers: {len(set(titles))}
+{countstr}
+
+* Number of checkpoints: {num_ckpts}
+{msglist}
+"""
+
+with open('modelzoo_statistics.md', 'w') as f:
+    f.write(modelzoo)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/train.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/train.md
new file mode 100644
index 0000000000000000000000000000000000000000..1deac95f7d18185ff0586f81095587471eac46c4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/train.md
@@ -0,0 +1,83 @@
+## Train a model
+
+MMSegmentation implements distributed training and non-distributed training,
+which uses `MMDistributedDataParallel` and `MMDataParallel` respectively.
+
+All outputs (log files and checkpoints) will be saved to the working directory,
+which is specified by `work_dir` in the config file.
+
+By default we evaluate the model on the validation set after some iterations, you can change the evaluation interval by adding the interval argument in the training config.
+
+```python
+evaluation = dict(interval=4000)  # This evaluate the model per 4000 iterations.
+```
+
+**\*Important\***: The default learning rate in config files is for 4 GPUs and 2 img/gpu (batch size = 4x2 = 8).
+Equivalently, you may also use 8 GPUs and 1 imgs/gpu since all models using cross-GPU SyncBN.
+
+To trade speed with GPU memory, you may pass in `--options model.backbone.with_cp=True` to enable checkpoint in backbone.
+
+### Train with a single GPU
+
+```shell
+python tools/train.py ${CONFIG_FILE} [optional arguments]
+```
+
+If you want to specify the working directory in the command, you can add an argument `--work-dir ${YOUR_WORK_DIR}`.
+
+### Train with multiple GPUs
+
+```shell
+./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
+```
+
+Optional arguments are:
+
+- `--no-validate` (**not suggested**): By default, the codebase will perform evaluation at every k iterations during the training. To disable this behavior, use `--no-validate`.
+- `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file.
+- `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file (to continue the training process).
+- `--load-from ${CHECKPOINT_FILE}`: Load weights from a checkpoint file (to start finetuning for another task).
+
+Difference between `resume-from` and `load-from`:
+
+- `resume-from` loads both the model weights and optimizer state including the iteration number.
+- `load-from` loads only the model weights, starts the training from iteration 0.
+
+### Train with multiple machines
+
+If you run MMSegmentation on a cluster managed with [slurm](https://slurm.schedmd.com/), you can use the script `slurm_train.sh`. (This script also supports single machine training.)
+
+```shell
+[GPUS=${GPUS}] ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} --work-dir ${WORK_DIR}
+```
+
+Here is an example of using 16 GPUs to train PSPNet on the dev partition.
+
+```shell
+GPUS=16 ./tools/slurm_train.sh dev pspr50 configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py /nfs/xxxx/psp_r50_512x1024_40ki_cityscapes
+```
+
+You can check [slurm_train.sh](../tools/slurm_train.sh) for full arguments and environment variables.
+
+If you have just multiple machines connected with ethernet, you can refer to
+PyTorch [launch utility](https://pytorch.org/docs/stable/distributed_deprecated.html#launch-utility).
+Usually it is slow if you do not have high speed networking like InfiniBand.
+
+### Launch multiple jobs on a single machine
+
+If you launch multiple jobs on a single machine, e.g., 2 jobs of 4-GPU training on a machine with 8 GPUs,
+you need to specify different ports (29500 by default) for each job to avoid communication conflict. Otherwise, there will be error message saying `RuntimeError: Address already in use`.
+
+If you use `dist_train.sh` to launch training jobs, you can set the port in commands with environment variable `PORT`.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG_FILE} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG_FILE} 4
+```
+
+If you use `slurm_train.sh` to launch training jobs, you can set the port in commands with environment variable `MASTER_PORT`.
+
+```shell
+MASTER_PORT=29500 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE}
+MASTER_PORT=29501 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE}
+```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/config.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/config.md
new file mode 100644
index 0000000000000000000000000000000000000000..b243c06d5b60fc09135b227235241c6583325a6b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/config.md
@@ -0,0 +1,381 @@
+# Tutorial 1: Learn about Configs
+
+We incorporate modular and inheritance design into our config system, which is convenient to conduct various experiments.
+If you wish to inspect the config file, you may run `python tools/print_config.py /PATH/TO/CONFIG` to see the complete config.
+You may also pass `--options xxx.yyy=zzz` to see updated config.
+
+## Config File Structure
+
+There are 4 basic component types under `config/_base_`, dataset, model, schedule, default_runtime.
+Many methods could be easily constructed with one of each like DeepLabV3, PSPNet.
+The configs that are composed by components from `_base_` are called _primitive_.
+
+For all configs under the same folder, it is recommended to have only **one** _primitive_ config. All other configs should inherit from the _primitive_ config. In this way, the maximum of inheritance level is 3.
+
+For easy understanding, we recommend contributors to inherit from exiting methods.
+For example, if some modification is made base on DeepLabV3, user may first inherit the basic DeepLabV3 structure by specifying `_base_ = ../deeplabv3/deeplabv3_r50_512x1024_40ki_cityscapes.py`, then modify the necessary fields in the config files.
+
+If you are building an entirely new method that does not share the structure with any of the existing methods, you may create a folder `xxxnet` under `configs`,
+
+Please refer to [mmcv](https://mmcv.readthedocs.io/en/latest/utils.html#config) for detailed documentation.
+
+## Config Name Style
+
+We follow the below style to name config files. Contributors are advised to follow the same style.
+
+```
+{model}_{backbone}_[misc]_[gpu x batch_per_gpu]_{resolution}_{schedule}_{dataset}
+```
+
+`{xxx}` is required field and `[yyy]` is optional.
+
+- `{model}`: model type like `psp`, `deeplabv3`, etc.
+- `{backbone}`: backbone type like `r50` (ResNet-50), `x101` (ResNeXt-101).
+- `[misc]`: miscellaneous setting/plugins of model, e.g. `dconv`, `gcb`, `attention`, `mstrain`.
+- `[gpu x batch_per_gpu]`: GPUs and samples per GPU, `8x2` is used by default.
+- `{schedule}`: training schedule, `20ki` means 20k iterations.
+- `{dataset}`: dataset like `cityscapes`, `voc12aug`, `ade`.
+
+## An Example of PSPNet
+
+To help the users have a basic idea of a complete config and the modules in a modern semantic segmentation system,
+we make brief comments on the config of PSPNet using ResNet50V1c as the following.
+For more detailed usage and the corresponding alternative for each modules, please refer to the API documentation.
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True)  # Segmentation usually uses SyncBN
+model = dict(
+    type='EncoderDecoder',  # Name of segmentor
+    pretrained='open-mmlab://resnet50_v1c',  # The ImageNet pretrained backbone to be loaded
+    backbone=dict(
+        type='ResNetV1c',  # The type of backbone. Please refer to mmseg/backbone/resnet.py for details.
+        depth=50,  # Depth of backbone. Normally 50, 101 are used.
+        num_stages=4,  # Number of stages of backbone.
+        out_indices=(0, 1, 2, 3),  # The index of output feature maps produced in each stages.
+        dilations=(1, 1, 2, 4),  # The dilation rate of each layer.
+        strides=(1, 2, 1, 1),  # The stride of each layer.
+        norm_cfg=dict(  # The configuration of norm layer.
+            type='SyncBN',  # Type of norm layer. Usually it is SyncBN.
+            requires_grad=True),   # Whether to train the gamma and beta in norm
+        norm_eval=False,  # Whether to freeze the statistics in BN
+        style='pytorch',  # The style of backbone, 'pytorch' means that stride 2 layers are in 3x3 conv, 'caffe' means stride 2 layers are in 1x1 convs.
+        contract_dilation=True),  # When dilation > 1, whether contract first layer of dilation.
+    decode_head=dict(
+        type='PSPHead',  # Type of decode head. Please refer to mmseg/models/decode_heads for available options.
+        in_channels=2048,  # Input channel of decode head.
+        in_index=3,  # The index of feature map to select.
+        channels=512,  # The intermediate channels of decode head.
+        pool_scales=(1, 2, 3, 6),  # The avg pooling scales of PSPHead. Please refer to paper for details.
+        dropout_ratio=0.1,  # The dropout ratio before final classification layer.
+        num_classes=19,  # Number of segmentation classs. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k.
+        norm_cfg=dict(type='SyncBN', requires_grad=True),  # The configuration of norm layer.
+        align_corners=False,  # The align_corners argument for resize in decoding.
+        loss_decode=dict(  # Config of loss function for the decode_head.
+            type='CrossEntropyLoss',  # Type of loss used for segmentation.
+            use_sigmoid=False,  # Whether use sigmoid activation for segmentation.
+            loss_weight=1.0)),  # Loss weight of decode head.
+    auxiliary_head=dict(
+        type='FCNHead',  # Type of auxiliary head. Please refer to mmseg/models/decode_heads for available options.
+        in_channels=1024,  # Input channel of auxiliary head.
+        in_index=2,  # The index of feature map to select.
+        channels=256,  # The intermediate channels of decode head.
+        num_convs=1,  # Number of convs in FCNHead. It is usually 1 in auxiliary head.
+        concat_input=False,  # Whether concat output of convs with input before classification layer.
+        dropout_ratio=0.1,  # The dropout ratio before final classification layer.
+        num_classes=19,  # Number of segmentation classs. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k.
+        norm_cfg=dict(type='SyncBN', requires_grad=True),  # The configuration of norm layer.
+        align_corners=False,  # The align_corners argument for resize in decoding.
+        loss_decode=dict(  # Config of loss function for the decode_head.
+            type='CrossEntropyLoss',  # Type of loss used for segmentation.
+            use_sigmoid=False,  # Whether use sigmoid activation for segmentation.
+            loss_weight=0.4)))  # Loss weight of auxiliary head, which is usually 0.4 of decode head.
+train_cfg = dict()  # train_cfg is just a place holder for now.
+test_cfg = dict(mode='whole')  # The test mode, options are 'whole' and 'sliding'. 'whole': whole image fully-convolutional test. 'sliding': sliding crop window on the image.
+dataset_type = 'CityscapesDataset'  # Dataset type, this will be used to define the dataset.
+data_root = 'data/cityscapes/'  # Root path of data.
+img_norm_cfg = dict(  # Image normalization config to normalize the input images.
+    mean=[123.675, 116.28, 103.53],  # Mean values used to pre-training the pre-trained backbone models.
+    std=[58.395, 57.12, 57.375],  # Standard variance used to pre-training the pre-trained backbone models.
+    to_rgb=True)  # The channel orders of image used to pre-training the pre-trained backbone models.
+crop_size = (512, 1024)  # The crop size during training.
+train_pipeline = [  # Training pipeline.
+    dict(type='LoadImageFromFile'),  # First pipeline to load images from file path.
+    dict(type='LoadAnnotations'),  # Second pipeline to load annotations for current image.
+    dict(type='Resize',  # Augmentation pipeline that resize the images and their annotations.
+        img_scale=(2048, 1024),  # The largest scale of image.
+        ratio_range=(0.5, 2.0)), # The augmented scale range as ratio.
+    dict(type='RandomCrop',  # Augmentation pipeline that randomly crop a patch from current image.
+        crop_size=(512, 1024),  # The crop size of patch.
+        cat_max_ratio=0.75),  # The max area ratio that could be occupied by single category.
+    dict(
+        type='RandomFlip',  # Augmentation pipeline that flip the images and their annotations
+        flip_ratio=0.5),  # The ratio or probability to flip
+    dict(type='PhotoMetricDistortion'),  # Augmentation pipeline that distort current image with several photo metric methods.
+    dict(
+        type='Normalize',  # Augmentation pipeline that normalize the input images
+        mean=[123.675, 116.28, 103.53],  # These keys are the same of img_norm_cfg since the
+        std=[58.395, 57.12, 57.375],  # keys of img_norm_cfg are used here as arguments
+        to_rgb=True),
+    dict(type='Pad',  # Augmentation pipeline that pad the image to specified size.
+        size=(512, 1024),  # The output size of padding.
+        pad_val=0,  # The padding value for image.
+        seg_pad_val=255),  # The padding value of 'gt_semantic_seg'.
+    dict(type='DefaultFormatBundle'),  # Default format bundle to gather data in the pipeline
+    dict(type='Collect',  # Pipeline that decides which keys in the data should be passed to the segmentor
+        keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),  # First pipeline to load images from file path
+    dict(
+        type='MultiScaleFlipAug',  # An encapsulation that encapsulates the test time augmentations
+        img_scale=(2048, 1024),  # Decides the largest scale for testing, used for the Resize pipeline
+        flip=False,  # Whether to flip images during testing
+        transforms=[
+            dict(type='Resize',  # Use resize augmentation
+                 keep_ratio=True),  # Whether to keep the ratio between height and width, the img_scale set here will be supressed by the img_scale set above.
+            dict(type='RandomFlip'),  # Thought RandomFlip is added in pipeline, it is not used when flip=False
+            dict(
+                type='Normalize',  # Normalization config, the values are from img_norm_cfg
+                mean=[123.675, 116.28, 103.53],
+                std=[58.395, 57.12, 57.375],
+                to_rgb=True),
+            dict(type='ImageToTensor', # Convert image to tensor
+                keys=['img']),
+            dict(type='Collect', # Collect pipeline that collect necessary keys for testing.
+                keys=['img'])
+        ])
+]
+data = dict(
+    samples_per_gpu=2,  # Batch size of a single GPU
+    workers_per_gpu=2,  # Worker to pre-fetch data for each single GPU
+    train=dict(  # Train dataset config
+        type='CityscapesDataset',  # Type of dataset, refer to mmseg/datasets/ for details.
+        data_root='data/cityscapes/',  # The root of dataset.
+        img_dir='leftImg8bit/train',  # The image directory of dataset.
+        ann_dir='gtFine/train',  # The annotation directory of dataset.
+        pipeline=[  # pipeline, this is passed by the train_pipeline created before.
+            dict(type='LoadImageFromFile'),
+            dict(type='LoadAnnotations'),
+            dict(
+                type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+            dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75),
+            dict(type='RandomFlip', flip_ratio=0.5),
+            dict(type='PhotoMetricDistortion'),
+            dict(
+                type='Normalize',
+                mean=[123.675, 116.28, 103.53],
+                std=[58.395, 57.12, 57.375],
+                to_rgb=True),
+            dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+        ]),
+    val=dict(  # Validation dataset config
+        type='CityscapesDataset',
+        data_root='data/cityscapes/',
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=[  # Pipeline is passed by test_pipeline created before
+            dict(type='LoadImageFromFile'),
+            dict(
+                type='MultiScaleFlipAug',
+                img_scale=(2048, 1024),
+                flip=False,
+                transforms=[
+                    dict(type='Resize', keep_ratio=True),
+                    dict(type='RandomFlip'),
+                    dict(
+                        type='Normalize',
+                        mean=[123.675, 116.28, 103.53],
+                        std=[58.395, 57.12, 57.375],
+                        to_rgb=True),
+                    dict(type='ImageToTensor', keys=['img']),
+                    dict(type='Collect', keys=['img'])
+                ])
+        ]),
+    test=dict(
+        type='CityscapesDataset',
+        data_root='data/cityscapes/',
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(
+                type='MultiScaleFlipAug',
+                img_scale=(2048, 1024),
+                flip=False,
+                transforms=[
+                    dict(type='Resize', keep_ratio=True),
+                    dict(type='RandomFlip'),
+                    dict(
+                        type='Normalize',
+                        mean=[123.675, 116.28, 103.53],
+                        std=[58.395, 57.12, 57.375],
+                        to_rgb=True),
+                    dict(type='ImageToTensor', keys=['img']),
+                    dict(type='Collect', keys=['img'])
+                ])
+        ]))
+log_config = dict(  # config to register logger hook
+    interval=50,  # Interval to print the log
+    hooks=[
+        # dict(type='TensorboardLoggerHook')  # The Tensorboard logger is also supported
+        dict(type='TextLoggerHook', by_epoch=False)
+    ])
+dist_params = dict(backend='nccl')  # Parameters to setup distributed training, the port can also be set.
+log_level = 'INFO'  # The level of logging.
+load_from = None  # load models as a pre-trained model from a given path. This will not resume training.
+resume_from = None  # Resume checkpoints from a given path, the training will be resumed from the iteration when the checkpoint's is saved.
+workflow = [('train', 1)]  # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once. The workflow trains the model by 40000 iterations according to the `runner.max_iters`.
+cudnn_benchmark = True  # Whether use cudnn_benchmark to speed up, which is fast for fixed input size.
+optimizer = dict(  # Config used to build optimizer, support all the optimizers in PyTorch whose arguments are also the same as those in PyTorch
+    type='SGD',  # Type of optimizers, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details
+    lr=0.01,  # Learning rate of optimizers, see detail usages of the parameters in the documentation of PyTorch
+    momentum=0.9,  # Momentum
+    weight_decay=0.0005)  # Weight decay of SGD
+optimizer_config = dict()  # Config used to build the optimizer hook, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/optimizer.py#L8 for implementation details.
+lr_config = dict(
+    policy='poly',  # The policy of scheduler, also support Step, CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9.
+    power=0.9,  # The power of polynomial decay.
+    min_lr=0.0001,  # The minimum learning rate to stable the training.
+    by_epoch=False)  # Whethe count by epoch or not.
+runner = dict(
+    type='IterBasedRunner', # Type of runner to use (i.e. IterBasedRunner or EpochBasedRunner)
+    max_iters=40000) # Total number of iterations. For EpochBasedRunner use `max_epochs`
+checkpoint_config = dict(  # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation.
+    by_epoch=False,  # Whethe count by epoch or not.
+    interval=4000)  # The save interval.
+evaluation = dict(  # The config to build the evaluation hook. Please refer to mmseg/core/evaulation/eval_hook.py for details.
+    interval=4000,  # The interval of evaluation.
+    metric='mIoU')  # The evaluation metric.
+
+
+```
+
+## FAQ
+
+### Ignore some fields in the base configs
+
+Sometimes, you may set `_delete_=True` to ignore some of fields in base configs.
+You may refer to [mmcv](https://mmcv.readthedocs.io/en/latest/utils.html#inherit-from-base-config-with-ignored-fields) for simple inllustration.
+
+In MMSegmentation, for example, to change the backbone of PSPNet with the following config.
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='MaskRCNN',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(...),
+    auxiliary_head=dict(...))
+```
+
+`ResNet` and `HRNet` use different keywords to construct.
+
+```python
+_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py'
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w32',
+    backbone=dict(
+        _delete_=True,
+        type='HRNet',
+        norm_cfg=norm_cfg,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256)))),
+    decode_head=dict(...),
+    auxiliary_head=dict(...))
+```
+
+The `_delete_=True` would replace all old keys in `backbone` field with new keys new keys.
+
+### Use intermediate variables in configs
+
+Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets.
+It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again.
+For example, we would like to change multi scale strategy to train/test a PSPNet. `train_pipeline`/`test_pipeline` are intermediate variable we would like modify.
+
+```python
+_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscapes.py'
+crop_size = (512, 1024)
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(1.0, 2.0)),  # change to [1., 2.]
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],  # change to multi scale testing
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
+```
+
+We first define the new `train_pipeline`/`test_pipeline` and pass them into `data`.
+
+Similarly, if we would like to switch from `SyncBN` to `BN` or `MMSyncBN`, we need to substitute every `norm_cfg` in the config.
+
+```python
+_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py'
+norm_cfg = dict(type='BN', requires_grad=True)
+model = dict(
+    backbone=dict(norm_cfg=norm_cfg),
+    decode_head=dict(norm_cfg=norm_cfg),
+    auxiliary_head=dict(norm_cfg=norm_cfg))
+```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_datasets.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_datasets.md
new file mode 100644
index 0000000000000000000000000000000000000000..020d51316e15a7f6926f49d81dcd2509f5170e07
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_datasets.md
@@ -0,0 +1,172 @@
+# Tutorial 2: Customize Datasets
+
+## Customize datasets by reorganizing data
+
+The simplest way is to convert your dataset to organize your data into folders.
+
+An example of file structure is as followed.
+
+```none
+├── data
+│   ├── my_dataset
+│   │   ├── img_dir
+│   │   │   ├── train
+│   │   │   │   ├── xxx{img_suffix}
+│   │   │   │   ├── yyy{img_suffix}
+│   │   │   │   ├── zzz{img_suffix}
+│   │   │   ├── val
+│   │   ├── ann_dir
+│   │   │   ├── train
+│   │   │   │   ├── xxx{seg_map_suffix}
+│   │   │   │   ├── yyy{seg_map_suffix}
+│   │   │   │   ├── zzz{seg_map_suffix}
+│   │   │   ├── val
+
+```
+
+A training pair will consist of the files with same suffix in img_dir/ann_dir.
+
+If `split` argument is given, only part of the files in img_dir/ann_dir will be loaded.
+We may specify the prefix of files we would like to be included in the split txt.
+
+More specifically, for a split txt like following,
+
+```none
+xxx
+zzz
+```
+
+Only
+`data/my_dataset/img_dir/train/xxx{img_suffix}`,
+`data/my_dataset/img_dir/train/zzz{img_suffix}`,
+`data/my_dataset/ann_dir/train/xxx{seg_map_suffix}`,
+`data/my_dataset/ann_dir/train/zzz{seg_map_suffix}` will be loaded.
+
+Note: The annotations are images of shape (H, W), the value pixel should fall in range `[0, num_classes - 1]`.
+You may use `'P'` mode of [pillow](https://pillow.readthedocs.io/en/stable/handbook/concepts.html#palette) to create your annotation image with color.
+
+## Customize datasets by mixing dataset
+
+MMSegmentation also supports to mix dataset for training.
+Currently it supports to concat and repeat datasets.
+
+### Repeat dataset
+
+We use `RepeatDataset` as wrapper to repeat the dataset.
+For example, suppose the original dataset is `Dataset_A`, to repeat it, the config looks like the following
+
+```python
+dataset_A_train = dict(
+        type='RepeatDataset',
+        times=N,
+        dataset=dict(  # This is the original config of Dataset_A
+            type='Dataset_A',
+            ...
+            pipeline=train_pipeline
+        )
+    )
+```
+
+### Concatenate dataset
+
+There 2 ways to concatenate the dataset.
+
+1. If the datasets you want to concatenate are in the same type with different annotation files,
+    you can concatenate the dataset configs like the following.
+
+    1. You may concatenate two `ann_dir`.
+
+        ```python
+        dataset_A_train = dict(
+            type='Dataset_A',
+            img_dir = 'img_dir',
+            ann_dir = ['anno_dir_1', 'anno_dir_2'],
+            pipeline=train_pipeline
+        )
+        ```
+
+    2. You may concatenate two `split`.
+
+        ```python
+        dataset_A_train = dict(
+            type='Dataset_A',
+            img_dir = 'img_dir',
+            ann_dir = 'anno_dir',
+            split = ['split_1.txt', 'split_2.txt'],
+            pipeline=train_pipeline
+        )
+        ```
+
+    3. You may concatenate two `ann_dir` and `split` simultaneously.
+
+        ```python
+        dataset_A_train = dict(
+            type='Dataset_A',
+            img_dir = 'img_dir',
+            ann_dir = ['anno_dir_1', 'anno_dir_2'],
+            split = ['split_1.txt', 'split_2.txt'],
+            pipeline=train_pipeline
+        )
+        ```
+
+        In this case, `ann_dir_1` and `ann_dir_2` are corresponding to `split_1.txt` and `split_2.txt`.
+
+2. In case the dataset you want to concatenate is different, you can concatenate the dataset configs like the following.
+
+    ```python
+    dataset_A_train = dict()
+    dataset_B_train = dict()
+
+    data = dict(
+        imgs_per_gpu=2,
+        workers_per_gpu=2,
+        train = [
+            dataset_A_train,
+            dataset_B_train
+        ],
+        val = dataset_A_val,
+        test = dataset_A_test
+        )
+    ```
+
+A more complex example that repeats `Dataset_A` and `Dataset_B` by N and M times, respectively, and then concatenates the repeated datasets is as the following.
+
+```python
+dataset_A_train = dict(
+    type='RepeatDataset',
+    times=N,
+    dataset=dict(
+        type='Dataset_A',
+        ...
+        pipeline=train_pipeline
+    )
+)
+dataset_A_val = dict(
+    ...
+    pipeline=test_pipeline
+)
+dataset_A_test = dict(
+    ...
+    pipeline=test_pipeline
+)
+dataset_B_train = dict(
+    type='RepeatDataset',
+    times=M,
+    dataset=dict(
+        type='Dataset_B',
+        ...
+        pipeline=train_pipeline
+    )
+)
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train = [
+        dataset_A_train,
+        dataset_B_train
+    ],
+    val = dataset_A_val,
+    test = dataset_A_test
+)
+
+```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_models.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_models.md
new file mode 100644
index 0000000000000000000000000000000000000000..f637fd6f0431ea0de748e39806be51d1b4849c8e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_models.md
@@ -0,0 +1,234 @@
+# Tutorial 4: Customize Models
+
+## Customize optimizer
+
+Assume you want to add a optimizer named as `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to first implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`:
+
+```python
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module
+class MyOptimizer(Optimizer):
+
+    def __init__(self, a, b, c)
+
+```
+
+Then add this module in `mmseg/core/optimizer/__init__.py` thus the registry will
+find the new module and add it:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+
+To use your own optimizer, the field can be changed as
+
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following.
+
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+
+The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+
+## Customize optimizer constructor
+
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNoarm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+
+```
+from mmcv.utils import build_from_cfg
+
+from mmcv.runner import OPTIMIZER_BUILDERS
+from .cocktail_optimizer import CocktailOptimizer
+
+
+@OPTIMIZER_BUILDERS.register_module
+class CocktailOptimizerConstructor(object):
+
+    def __init__(self, optimizer_cfg, paramwise_cfg=None):
+
+    def __call__(self, model):
+
+        return my_optimizer
+
+```
+
+## Develop new components
+
+There are mainly 2 types of components in MMSegmentation.
+
+- backbone: usually stacks of convolutional network to extract feature maps, e.g., ResNet, HRNet.
+- head: the component for semantic segmentation map decoding.
+
+### Add new backbones
+
+Here we show how to develop new components with an example of MobileNet.
+
+1. Create a new file `mmseg/models/backbones/mobilenet.py`.
+
+```python
+import torch.nn as nn
+
+from ..registry import BACKBONES
+
+
+@BACKBONES.register_module
+class MobileNet(nn.Module):
+
+    def __init__(self, arg1, arg2):
+        pass
+
+    def forward(self, x):  # should return a tuple
+        pass
+
+    def init_weights(self, pretrained=None):
+        pass
+```
+
+2. Import the module in `mmseg/models/backbones/__init__.py`.
+
+```python
+from .mobilenet import MobileNet
+```
+
+3. Use it in your config file.
+
+```python
+model = dict(
+    ...
+    backbone=dict(
+        type='MobileNet',
+        arg1=xxx,
+        arg2=xxx),
+    ...
+```
+
+### Add new heads
+
+In MMSegmentation, we provide a base [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/decode_head.py) for all segmentation head.
+All newly implemented decode heads should be derived from it.
+Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following.
+
+First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`.
+PSPNet implements a decode head for segmentation decode.
+To implement a decode head, basically we need to implement three functions of the new module as the following.
+
+```python
+@HEADS.register_module()
+class PSPHead(BaseDecodeHead):
+
+    def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+        super(PSPHead, self).__init__(**kwargs)
+
+    def init_weights(self):
+
+    def forward(self, inputs):
+
+```
+
+Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py` thus the corresponding registry could find and load them.
+
+To config file of PSPNet is as the following
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='PSPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
+
+```
+
+### Add new loss
+
+Assume you want to add a new loss as `MyLoss` for segmentation decode.
+To add a new loss function, the users need implement it in `mmseg/models/losses/my_loss.py`.
+The decorator `weighted_loss` enable the loss to be weighted for each element.
+
+```python
+import torch
+import torch.nn as nn
+
+from ..builder import LOSSES
+from .utils import weighted_loss
+
+@weighted_loss
+def my_loss(pred, target):
+    assert pred.size() == target.size() and target.numel() > 0
+    loss = torch.abs(pred - target)
+    return loss
+
+@LOSSES.register_module
+class MyLoss(nn.Module):
+
+    def __init__(self, reduction='mean', loss_weight=1.0):
+        super(MyLoss, self).__init__()
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss = self.loss_weight * my_loss(
+            pred, target, weight, reduction=reduction, avg_factor=avg_factor)
+        return loss
+```
+
+Then the users need to add it in the `mmseg/models/losses/__init__.py`.
+
+```python
+from .my_loss import MyLoss, my_loss
+
+```
+
+To use it, modify the `loss_xxx` field.
+Then you need to modify the `loss_decode` field in the head.
+`loss_weight` could be used to balance multiple losses.
+
+```python
+loss_decode=dict(type='MyLoss', loss_weight=1.0))
+```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_runtime.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_runtime.md
new file mode 100644
index 0000000000000000000000000000000000000000..dd67ef54f639fa0b3b9a01727d16352752d30899
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/customize_runtime.md
@@ -0,0 +1,243 @@
+# Tutorial 6: Customize Runtime Settings
+
+## Customize optimization settings
+
+### Customize optimizer supported by Pytorch
+
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `ADAM` (note that the performance could drop a lot), the modification could be as the following.
+
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+
+To modify the learning rate of the model, the users only need to modify the `lr` in the config of optimizer. The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+
+### Customize self-implemented optimizer
+
+#### 1. Define a new optimizer
+
+A customized optimizer could be defined as following.
+
+Assume you want to add a optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to create a new directory named `mmseg/core/optimizer`.
+And then implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`:
+
+```python
+from .registry import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+
+    def __init__(self, a, b, c)
+
+```
+
+#### 2. Add the optimizer to registry
+
+To find the above module defined above, this module should be imported into the main namespace at first. There are two options to achieve it.
+
+- Modify `mmseg/core/optimizer/__init__.py` to import it.
+
+    The newly defined module should be imported in `mmseg/core/optimizer/__init__.py` so that the registry will
+    find the new module and add it:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+- Use `custom_imports` in the config to manually import it
+
+```python
+custom_imports = dict(imports=['mmseg.core.optimizer.my_optimizer'], allow_failed_imports=False)
+```
+
+The module `mmseg.core.optimizer.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered.
+Note that only the package containing the class `MyOptimizer` should be imported.
+`mmseg.core.optimizer.my_optimizer.MyOptimizer` **cannot** be imported directly.
+
+Actually users can use a totally different file directory structure using this importing method, as long as the module root can be located in `PYTHONPATH`.
+
+#### 3. Specify the optimizer in the config file
+
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+
+To use your own optimizer, the field can be changed to
+
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+
+### Customize optimizer constructor
+
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+
+```python
+from mmcv.utils import build_from_cfg
+
+from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
+from mmseg.utils import get_root_logger
+from .my_optimizer import MyOptimizer
+
+
+@OPTIMIZER_BUILDERS.register_module()
+class MyOptimizerConstructor(object):
+
+    def __init__(self, optimizer_cfg, paramwise_cfg=None):
+
+    def __call__(self, model):
+
+        return my_optimizer
+
+```
+
+The default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11), which could also serve as a template for new optimizer constructor.
+
+### Additional settings
+
+Tricks not implemented by the optimizer should be implemented through optimizer constructor (e.g., set parameter-wise learning rates) or hooks. We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings.
+
+- __Use gradient clip to stabilize training__:
+    Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below:
+
+    ```python
+    optimizer_config = dict(
+        _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
+    ```
+
+    If your config inherits the base config which already sets the `optimizer_config`, you might need `_delete_=True` to overide the unnecessary settings. See the [config documenetation](https://mmsegmentation.readthedocs.io/en/latest/config.html) for more details.
+
+- __Use momentum schedule to accelerate model convergence__:
+    We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way.
+    Momentum scheduler is usually used with LR scheduler, for example, the following config is used in 3D detection to accelerate convergence.
+    For more details, please refer to the implementation of [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327) and [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130).
+
+    ```python
+    lr_config = dict(
+        policy='cyclic',
+        target_ratio=(10, 1e-4),
+        cyclic_times=1,
+        step_ratio_up=0.4,
+    )
+    momentum_config = dict(
+        policy='cyclic',
+        target_ratio=(0.85 / 0.95, 1),
+        cyclic_times=1,
+        step_ratio_up=0.4,
+    )
+    ```
+
+## Customize training schedules
+
+By default we use step learning rate with 40k/80k schedule, this calls [`PolyLrUpdaterHook`](https://github.com/open-mmlab/mmcv/blob/826d3a7b68596c824fa1e2cb89b6ac274f52179c/mmcv/runner/hooks/lr_updater.py#L196) in MMCV.
+We support many other learning rate schedule [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), such as `CosineAnnealing` and `Poly` schedule. Here are some examples
+
+- Step schedule:
+
+    ```python
+    lr_config = dict(policy='step', step=[9, 10])
+    ```
+
+- ConsineAnnealing schedule:
+
+    ```python
+    lr_config = dict(
+        policy='CosineAnnealing',
+        warmup='linear',
+        warmup_iters=1000,
+        warmup_ratio=1.0 / 10,
+        min_lr_ratio=1e-5)
+    ```
+
+## Customize workflow
+
+Workflow is a list of (phase, epochs) to specify the running order and epochs.
+By default it is set to be
+
+```python
+workflow = [('train', 1)]
+```
+
+which means running 1 epoch for training.
+Sometimes user may want to check some metrics (e.g. loss, accuracy) about the model on the validate set.
+In such case, we can set the workflow as
+
+```python
+[('train', 1), ('val', 1)]
+```
+
+so that 1 epoch for training and 1 epoch for validation will be run iteratively.
+
+**Note**:
+
+1. The parameters of model will not be updated during val epoch.
+2. Keyword `total_epochs` in the config only controls the number of training epochs and will not affect the validation workflow.
+3. Workflows `[('train', 1), ('val', 1)]` and `[('train', 1)]` will not change the behavior of `EvalHook` because `EvalHook` is called by `after_train_epoch` and validation workflow only affect hooks that are called through `after_val_epoch`. Therefore, the only difference between `[('train', 1), ('val', 1)]` and `[('train', 1)]` is that the runner will calculate losses on validation set after each training epoch.
+
+## Customize hooks
+
+### Use hooks implemented in MMCV
+
+If the hook is already implemented in MMCV, you can directly modify the config to use the hook as below
+
+```python
+custom_hooks = [
+    dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL')
+]
+```
+
+### Modify default runtime hooks
+
+There are some common hooks that are not registerd through `custom_hooks`, they are
+
+- log_config
+- checkpoint_config
+- evaluation
+- lr_config
+- optimizer_config
+- momentum_config
+
+In those hooks, only the logger hook has the `VERY_LOW` priority, others' priority are `NORMAL`.
+The above-mentioned tutorials already covers how to modify `optimizer_config`, `momentum_config`, and `lr_config`.
+Here we reveals how what we can do with `log_config`, `checkpoint_config`, and `evaluation`.
+
+#### Checkpoint config
+
+The MMCV runner will use `checkpoint_config` to initialize [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9).
+
+```python
+checkpoint_config = dict(interval=1)
+```
+
+The users could set `max_keep_ckpts` to only save only small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`. More details of the arguments are [here](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook)
+
+#### Log config
+
+The `log_config` wraps multiple logger hooks and enables to set intervals. Now MMCV supports `WandbLoggerHook`, `MlflowLoggerHook`, and `TensorboardLoggerHook`.
+The detail usages can be found in the [doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook).
+
+```python
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+```
+
+#### Evaluation config
+
+The config of `evaluation` will be used to initialize the [`EvalHook`](https://github.com/open-mmlab/mmsegmentation/blob/e3f6f655d69b777341aec2fe8829871cc0beadcb/mmseg/core/evaluation/eval_hooks.py#L7).
+Except the key `interval`, other arguments such as `metric` will be passed to the `dataset.evaluate()`
+
+```python
+evaluation = dict(interval=1, metric='mIoU')
+```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/data_pipeline.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/data_pipeline.md
new file mode 100644
index 0000000000000000000000000000000000000000..1eecfe91d433ca897ae47a5ae9b8c5f406a3fe2b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/data_pipeline.md
@@ -0,0 +1,171 @@
+# Tutorial 3: Customize Data Pipelines
+
+## Design of Data pipelines
+
+Following typical conventions, we use `Dataset` and `DataLoader` for data loading
+with multiple workers. `Dataset` returns a dict of data items corresponding
+the arguments of models' forward method.
+Since the data in semantic segmentation may not be the same size,
+we introduce a new `DataContainer` type in MMCV to help collect and distribute
+data of different size.
+See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
+
+The data preparation pipeline and the dataset is decomposed. Usually a dataset
+defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict.
+A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform.
+
+The operations are categorized into data loading, pre-processing, formatting and test-time augmentation.
+
+Here is an pipeline example for PSPNet.
+
+```python
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+```
+
+For each operation, we list the related dict fields that are added/updated/removed.
+
+### Data loading
+
+`LoadImageFromFile`
+
+- add: img, img_shape, ori_shape
+
+`LoadAnnotations`
+
+- add: gt_semantic_seg, seg_fields
+
+### Pre-processing
+
+`Resize`
+
+- add: scale, scale_idx, pad_shape, scale_factor, keep_ratio
+- update: img, img_shape, *seg_fields
+
+`RandomFlip`
+
+- add: flip
+- update: img, *seg_fields
+
+`Pad`
+
+- add: pad_fixed_size, pad_size_divisor
+- update: img, pad_shape, *seg_fields
+
+`RandomCrop`
+
+- update: img, pad_shape, *seg_fields
+
+`Normalize`
+
+- add: img_norm_cfg
+- update: img
+
+`SegRescale`
+
+- update: gt_semantic_seg
+
+`PhotoMetricDistortion`
+
+- update: img
+
+### Formatting
+
+`ToTensor`
+
+- update: specified by `keys`.
+
+`ImageToTensor`
+
+- update: specified by `keys`.
+
+`Transpose`
+
+- update: specified by `keys`.
+
+`ToDataContainer`
+
+- update: specified by `fields`.
+
+`DefaultFormatBundle`
+
+- update: img, gt_semantic_seg
+
+`Collect`
+
+- add: img_meta (the keys of img_meta is specified by `meta_keys`)
+- remove: all other keys except for those specified by `keys`
+
+### Test time augmentation
+
+`MultiScaleFlipAug`
+
+## Extend and use custom pipelines
+
+1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict.
+
+    ```python
+    from mmseg.datasets import PIPELINES
+
+    @PIPELINES.register_module()
+    class MyTransform:
+
+        def __call__(self, results):
+            results['dummy'] = True
+            return results
+    ```
+
+2. Import the new class.
+
+    ```python
+    from .my_pipeline import MyTransform
+    ```
+
+3. Use it in config files.
+
+    ```python
+    img_norm_cfg = dict(
+        mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+    crop_size = (512, 1024)
+    train_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(type='LoadAnnotations'),
+        dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+        dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+        dict(type='RandomFlip', flip_ratio=0.5),
+        dict(type='PhotoMetricDistortion'),
+        dict(type='Normalize', **img_norm_cfg),
+        dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+        dict(type='MyTransform'),
+        dict(type='DefaultFormatBundle'),
+        dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+    ]
+    ```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/training_tricks.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/training_tricks.md
new file mode 100644
index 0000000000000000000000000000000000000000..98a201fa649d94525facd8237d32effd6560c658
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/tutorials/training_tricks.md
@@ -0,0 +1,52 @@
+# Tutorial 5: Training Tricks
+
+MMSegmentation support following training tricks out of box.
+
+## Different Learning Rate(LR) for Backbone and Heads
+
+In semantic segmentation, some methods make the LR of heads larger than backbone to achieve better performance or faster convergence.
+
+In MMSegmentation, you may add following lines to config to make the LR of heads 10 times of backbone.
+
+```python
+optimizer=dict(
+    paramwise_cfg = dict(
+        custom_keys={
+            'head': dict(lr_mult=10.)}))
+```
+
+With this modification, the LR of any parameter group with `'head'` in name will be multiplied by 10.
+You may refer to [MMCV doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.DefaultOptimizerConstructor) for further details.
+
+## Online Hard Example Mining (OHEM)
+
+We implement pixel sampler [here](https://github.com/open-mmlab/mmsegmentation/tree/master/mmseg/core/seg/sampler) for training sampling.
+Here is an example config of training PSPNet with OHEM enabled.
+
+```python
+_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py'
+model=dict(
+    decode_head=dict(
+        sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=100000)) )
+```
+
+In this way, only pixels with confidence score under 0.7 are used to train. And we keep at least 100000 pixels during training. If `thresh` is not specified, pixels of top ``min_kept`` loss will be selected.
+
+## Class Balanced Loss
+
+For dataset that is not balanced in classes distribution, you may change the loss weight of each class.
+Here is an example for cityscapes dataset.
+
+```python
+_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py'
+model=dict(
+    decode_head=dict(
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0,
+            # DeepLab used this class weight for cityscapes
+            class_weight=[0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                        1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                        1.0865, 1.0955, 1.0865, 1.1529, 1.0507])))
+```
+
+`class_weight` will be passed into `CrossEntropyLoss` as `weight` argument. Please refer to [PyTorch Doc](https://pytorch.org/docs/stable/nn.html?highlight=crossentropy#torch.nn.CrossEntropyLoss) for details.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/useful_tools.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/useful_tools.md
new file mode 100644
index 0000000000000000000000000000000000000000..514b5680ee4eff351affd3bfa169c11d5dee9b9f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/docs/useful_tools.md
@@ -0,0 +1,64 @@
+Apart from training/testing scripts, We provide lots of useful tools under the
+ `tools/` directory.
+
+### Get the FLOPs and params (experimental)
+
+We provide a script adapted from [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) to compute the FLOPs and params of a given model.
+
+```shell
+python tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
+```
+
+You will get the result like this.
+
+```none
+==============================
+Input shape: (3, 2048, 1024)
+Flops: 1429.68 GMac
+Params: 48.98 M
+==============================
+```
+
+**Note**: This tool is still experimental and we do not guarantee that the number is correct. You may well use the result for simple comparisons, but double check it before you adopt it in technical reports or papers.
+
+(1) FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 1280, 800).
+(2) Some operators are not counted into FLOPs like GN and custom operators.
+
+### Publish a model
+
+Before you upload a model to AWS, you may want to
+(1) convert model weights to CPU tensors, (2) delete the optimizer states and
+(3) compute the hash of the checkpoint file and append the hash id to the filename.
+
+```shell
+python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME}
+```
+
+E.g.,
+
+```shell
+python tools/publish_model.py work_dirs/pspnet/latest.pth psp_r50_hszhao_200ep.pth
+```
+
+The final output filename will be `psp_r50_512x1024_40ki_cityscapes-{hash id}.pth`.
+
+### Convert to ONNX (experimental)
+
+We provide a script to convert model to [ONNX](https://github.com/onnx/onnx) format. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between Pytorch and ONNX model.
+
+```shell
+python tools/pytorch2onnx.py ${CONFIG_FILE} --checkpoint ${CHECKPOINT_FILE} --output-file ${ONNX_FILE} [--shape ${INPUT_SHAPE} --verify]
+```
+
+**Note**: This tool is still experimental. Some customized operators are not supported for now.
+
+## Miscellaneous
+
+### Print the entire config
+
+`tools/print_config.py` prints the whole config verbatim, expanding all its
+ imports.
+
+```shell
+python tools/print_config.py ${CONFIG} [-h] [--options ${OPTIONS [OPTIONS...]}]
+```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..efc8b4bb20c981f3db6df7eb52b3dc0744c94cc0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/ade20k.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'ADE20KDataset'
+data_root = 'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', reduce_zero_label=True),
+    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/training',
+        ann_dir='annotations/training',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/ade20k_repeat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/ade20k_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..27fac27761ca53368d9f8cba78f1d68bc1d10416
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/ade20k_repeat.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'ADE20KDataset'
+data_root = 'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', reduce_zero_label=True),
+    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='AlignedResize', keep_ratio=True, size_divisor=32), # Ensure the long and short sides are divisible by 32
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=50,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..298594ea925f87f22b37094a2ec50e370aec96a0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/chase_db1.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'ChaseDB1Dataset'
+data_root = 'data/CHASE_DB1'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (960, 999)
+crop_size = (128, 128)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f21867c63e1835f6fceb61f066e802fd8fd2a735
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes.py
@@ -0,0 +1,54 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/train',
+        ann_dir='gtFine/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_1024x1024_repeat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_1024x1024_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdf36ee3cf719f82effc2559704d5270dce60fe0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_1024x1024_repeat.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = ''
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (1024, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=500,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='leftImg8bit/train',
+            ann_dir='gtFine/train',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_768x768_repeat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_768x768_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca140341a05a5f8eaf1bb9df3dea215f44a9ff21
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_768x768_repeat.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (768, 768)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=500,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='leftImg8bit/train',
+            ann_dir='gtFine/train',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_repeat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef92413029c14d9e9482e729add52cd88aaeffb4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/cityscapes_repeat.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=300,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='leftImg8bit/train',
+            ann_dir='gtFine/train',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..06e8ff606e0d2a4514ec8b7d2c6c436a32efcbf4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/drive.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'DRIVEDataset'
+data_root = 'data/DRIVE'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (584, 565)
+crop_size = (64, 64)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..242d790eb1b83e75cf6b7eaa7a35c674099311ad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/hrf.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'HRFDataset'
+data_root = 'data/HRF'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (2336, 3504)
+crop_size = (256, 256)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/mapillary_1024x1024_repeat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/mapillary_1024x1024_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..58c9c32daf55b8a0a3c3c274c10a3d9d16b8b2c2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/mapillary_1024x1024_repeat.py
@@ -0,0 +1,58 @@
+# dataset settings
+dataset_type = 'MapillaryDataset'
+data_root = 'data/Mapillary/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (1024, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='MaillaryHack'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 1.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='AlignedResize', keep_ratio=True, size_divisor=32),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=100,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='training/images',
+            ann_dir='training/labels',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='validation/images',
+        ann_dir='validation/labels',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='validation/images',
+        ann_dir='validation/labels',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/mapillary_768x768_repeat.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/mapillary_768x768_repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..c032dc8b098f891ff2e74d170bd9c5a28252b504
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/mapillary_768x768_repeat.py
@@ -0,0 +1,58 @@
+# dataset settings
+dataset_type = 'MapillaryDataset'
+data_root = 'data/Mapillary/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (768, 768)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='MaillaryHack'),
+    dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 1.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 1024),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='AlignedResize', keep_ratio=True, size_divisor=32),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=100,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='training/images',
+            ann_dir='training/labels',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='validation/images',
+        ann_dir='validation/labels',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='validation/images',
+        ann_dir='validation/labels',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff65bad1b86d7e3a5980bb5b9fc55798dc8df5f4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_context.py
@@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'PascalContextDataset'
+data_root = 'data/VOCdevkit/VOC2010/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (520, 520)
+crop_size = (480, 480)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_voc12.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_voc12.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba1d42d0c5781f56dc177d860d856bb34adce555
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_voc12.py
@@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'PascalVOCDataset'
+data_root = 'data/VOCdevkit/VOC2012'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClass',
+        split='ImageSets/Segmentation/val.txt',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_voc12_aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_voc12_aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f23b6717d53ad29f02dd15046802a2631a5076b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/pascal_voc12_aug.py
@@ -0,0 +1,9 @@
+_base_ = './pascal_voc12.py'
+# dataset settings
+data = dict(
+    train=dict(
+        ann_dir=['SegmentationClass', 'SegmentationClassAug'],
+        split=[
+            'ImageSets/Segmentation/train.txt',
+            'ImageSets/Segmentation/aug.txt'
+        ]))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f71b25488cc11a6b4d582ac52b5a24e1ad1cf8e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/datasets/stare.py
@@ -0,0 +1,59 @@
+# dataset settings
+dataset_type = 'STAREDataset'
+data_root = 'data/STARE'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+img_scale = (605, 700)
+crop_size = (128, 128)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=40000,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/default_runtime.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/default_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..b564cc4e7e7d9a67dacaaddecb100e4d8f5c005b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/default_runtime.py
@@ -0,0 +1,14 @@
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook', by_epoch=False),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+cudnn_benchmark = True
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ann_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ann_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2cb653827e44e6015b3b83bc578003e614a6aa1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ann_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='ANNHead',
+        in_channels=[1024, 2048],
+        in_index=[2, 3],
+        channels=512,
+        project_channels=256,
+        query_scales=(1, ),
+        key_pool_scales=(1, 3, 6, 8),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/apcnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/apcnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8f5316cbcf3896ba9de7ca2c801eba512f01d5e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/apcnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='APCHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ccnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ccnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..794148f576b9e215c3c6963e73dffe98204b7717
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ccnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='CCHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        recurrence=2,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/cgnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/cgnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..eff8d9458c877c5db894957e0b1b4597e40da6ab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/cgnet.py
@@ -0,0 +1,35 @@
+# model settings
+norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='CGNet',
+        norm_cfg=norm_cfg,
+        in_channels=3,
+        num_channels=(32, 64, 128),
+        num_blocks=(3, 21),
+        dilations=(2, 4),
+        reductions=(8, 16)),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=256,
+        in_index=2,
+        channels=256,
+        num_convs=0,
+        concat_input=False,
+        dropout_ratio=0,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        loss_decode=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=False,
+            loss_weight=1.0,
+            class_weight=[
+                2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
+                10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
+                10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
+                10.396974, 10.055647
+            ])),
+    # model training and testing settings
+    train_cfg=dict(sampler=None),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/danet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/danet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c934939fac48525f22ad86f489a041dd7db7d09
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/danet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DAHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pam_channels=64,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7a43bee01422ad4795dd27874e0cd4bb6cbfecf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='ASPPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dilations=(1, 12, 24, 36),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3_unet_s5-d16.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3_unet_s5-d16.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cd262999d8b2cb8e14a5c32190ae73f479d8e81
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3_unet_s5-d16.py
@@ -0,0 +1,50 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='UNet',
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        with_cp=False,
+        conv_cfg=None,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        upsample_cfg=dict(type='InterpConv'),
+        norm_eval=False),
+    decode_head=dict(
+        type='ASPPHead',
+        in_channels=64,
+        in_index=4,
+        channels=16,
+        dilations=(1, 12, 24, 36),
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=128,
+        in_index=3,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=256, stride=170))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3plus_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3plus_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..050e39e091d816df9028d23aa3ecf9db74e441e1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/deeplabv3plus_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DepthwiseSeparableASPPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dilations=(1, 12, 24, 36),
+        c1_in_channels=256,
+        c1_channels=48,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/dmnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/dmnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..d22ba52640bebd805b3b8d07025e276dfb023759
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/dmnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DMHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        filter_sizes=(1, 3, 5, 7),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/dnl_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/dnl_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..edb4c174c51e34c103737ba39bfc48bf831e561d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/dnl_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='DNLHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dropout_ratio=0.1,
+        reduction=2,
+        use_scale=True,
+        mode='embedded_gaussian',
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/emanet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/emanet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..26adcd430926de0862204a71d345f2543167f27b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/emanet_r50-d8.py
@@ -0,0 +1,47 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='EMAHead',
+        in_channels=2048,
+        in_index=3,
+        channels=256,
+        ema_channels=512,
+        num_bases=64,
+        num_stages=3,
+        momentum=0.1,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/encnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/encnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..be777123a886503172a95fe0719e956a147bbd68
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/encnet_r50-d8.py
@@ -0,0 +1,48 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='EncHead',
+        in_channels=[512, 1024, 2048],
+        in_index=(1, 2, 3),
+        channels=512,
+        num_codes=32,
+        use_se_loss=True,
+        add_lateral=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_se_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fast_scnn.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fast_scnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..32fdeb659355a5ce5ef2cc7c2f30742703811cdf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fast_scnn.py
@@ -0,0 +1,57 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='FastSCNN',
+        downsample_dw_channels=(32, 48),
+        global_in_channels=64,
+        global_block_channels=(64, 96, 128),
+        global_block_strides=(2, 2, 1),
+        global_out_channels=128,
+        higher_in_channels=64,
+        lower_in_channels=128,
+        fusion_out_channels=128,
+        out_indices=(0, 1, 2),
+        norm_cfg=norm_cfg,
+        align_corners=False),
+    decode_head=dict(
+        type='DepthwiseSeparableFCNHead',
+        in_channels=128,
+        channels=128,
+        concat_input=False,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
+    auxiliary_head=[
+        dict(
+            type='FCNHead',
+            in_channels=128,
+            channels=32,
+            num_convs=1,
+            num_classes=19,
+            in_index=-2,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
+        dict(
+            type='FCNHead',
+            in_channels=64,
+            channels=32,
+            num_convs=1,
+            num_classes=19,
+            in_index=-3,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
+    ],
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_hr18.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_hr18.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3e299bc89ada56ca14bbffcbdb08a586b8ed9e9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_hr18.py
@@ -0,0 +1,52 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://msra/hrnetv2_w18',
+    backbone=dict(
+        type='HRNet',
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(18, 36)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(18, 36, 72)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(18, 36, 72, 144)))),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=[18, 36, 72, 144],
+        in_index=(0, 1, 2, 3),
+        channels=sum([18, 36, 72, 144]),
+        input_transform='resize_concat',
+        kernel_size=1,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=-1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e98f6cc918b6146fc6d613c6918e825ef1355c3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_r50-d8.py
@@ -0,0 +1,45 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        num_convs=2,
+        concat_input=True,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_unet_s5-d16.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_unet_s5-d16.py
new file mode 100644
index 0000000000000000000000000000000000000000..a33e7972877f902d0e7d18401ca675e3e4e60a18
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fcn_unet_s5-d16.py
@@ -0,0 +1,51 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='UNet',
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        with_cp=False,
+        conv_cfg=None,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        upsample_cfg=dict(type='InterpConv'),
+        norm_eval=False),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=64,
+        in_index=4,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=128,
+        in_index=3,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=256, stride=170))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fpn_r50.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fpn_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..86ab327db92e44c14822d65f1c9277cb007f17c1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/fpn_r50.py
@@ -0,0 +1,36 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=4),
+    decode_head=dict(
+        type='FPNHead',
+        in_channels=[256, 256, 256, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/gcnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/gcnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d2ad69f5c22adfe79d5fdabf920217628987166
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/gcnet_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='GCHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        ratio=1 / 4.,
+        pooling_type='att',
+        fusion_types=('channel_add', ),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/lraspp_m-v3-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/lraspp_m-v3-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..93258242a90695cc94a7c6bd41562d6a75988771
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/lraspp_m-v3-d8.py
@@ -0,0 +1,25 @@
+# model settings
+norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='MobileNetV3',
+        arch='large',
+        out_indices=(1, 3, 16),
+        norm_cfg=norm_cfg),
+    decode_head=dict(
+        type='LRASPPHead',
+        in_channels=(16, 24, 960),
+        in_index=(0, 1, 2),
+        channels=128,
+        input_transform='multiple_select',
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/nonlocal_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/nonlocal_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..5674a39854cafd1f2e363bac99c58ccae62f24da
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/nonlocal_r50-d8.py
@@ -0,0 +1,46 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='NLHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        dropout_ratio=0.1,
+        reduction=2,
+        use_scale=True,
+        mode='embedded_gaussian',
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ocrnet_hr18.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ocrnet_hr18.py
new file mode 100644
index 0000000000000000000000000000000000000000..c60f62a7cdf3f5c5096a7a7e725e8268fddcb057
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ocrnet_hr18.py
@@ -0,0 +1,68 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='CascadeEncoderDecoder',
+    num_stages=2,
+    pretrained='open-mmlab://msra/hrnetv2_w18',
+    backbone=dict(
+        type='HRNet',
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(18, 36)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(18, 36, 72)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(18, 36, 72, 144)))),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=[18, 36, 72, 144],
+            channels=sum([18, 36, 72, 144]),
+            in_index=(0, 1, 2, 3),
+            input_transform='resize_concat',
+            kernel_size=1,
+            num_convs=1,
+            concat_input=False,
+            dropout_ratio=-1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=[18, 36, 72, 144],
+            in_index=(0, 1, 2, 3),
+            input_transform='resize_concat',
+            channels=512,
+            ocr_channels=256,
+            dropout_ratio=-1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    ],
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ocrnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ocrnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..615aa3ff703942b6c22b2d6e9642504dd3e41ebd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/ocrnet_r50-d8.py
@@ -0,0 +1,47 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='CascadeEncoderDecoder',
+    num_stages=2,
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=[
+        dict(
+            type='FCNHead',
+            in_channels=1024,
+            in_index=2,
+            channels=256,
+            num_convs=1,
+            concat_input=False,
+            dropout_ratio=0.1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='OCRHead',
+            in_channels=2048,
+            in_index=3,
+            channels=512,
+            ocr_channels=256,
+            dropout_ratio=0.1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ],
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pointrend_r50.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pointrend_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d323dbf9466d41e0800aa57ef84045f3d874bdf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pointrend_r50.py
@@ -0,0 +1,56 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='CascadeEncoderDecoder',
+    num_stages=2,
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=4),
+    decode_head=[
+        dict(
+            type='FPNHead',
+            in_channels=[256, 256, 256, 256],
+            in_index=[0, 1, 2, 3],
+            feature_strides=[4, 8, 16, 32],
+            channels=128,
+            dropout_ratio=-1,
+            num_classes=19,
+            norm_cfg=norm_cfg,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+        dict(
+            type='PointHead',
+            in_channels=[256],
+            in_index=[0],
+            channels=256,
+            num_fcs=3,
+            coarse_pred_each_layer=True,
+            dropout_ratio=-1,
+            num_classes=19,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    ],
+    # model training and testing settings
+    train_cfg=dict(
+        num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
+    test_cfg=dict(
+        mode='whole',
+        subdivision_steps=2,
+        subdivision_num_points=8196,
+        scale_factor=2))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/psanet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/psanet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..689513fa9d2a40f14bf0ae4ae61f38f0dcc1b3da
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/psanet_r50-d8.py
@@ -0,0 +1,49 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='PSAHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        mask_size=(97, 97),
+        psa_type='bi-direction',
+        compact=False,
+        shrink_factor=2,
+        normalization_factor=1.0,
+        psa_softmax=True,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pspnet_r50-d8.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pspnet_r50-d8.py
new file mode 100644
index 0000000000000000000000000000000000000000..f451e08ad2eb0732dcb806b1851eb978d4acf136
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pspnet_r50-d8.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='PSPHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pspnet_unet_s5-d16.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pspnet_unet_s5-d16.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcff9ec4f41fad158344ecd77313dc14564f3682
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/pspnet_unet_s5-d16.py
@@ -0,0 +1,50 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='UNet',
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        with_cp=False,
+        conv_cfg=None,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='ReLU'),
+        upsample_cfg=dict(type='InterpConv'),
+        norm_eval=False),
+    decode_head=dict(
+        type='PSPHead',
+        in_channels=64,
+        in_index=4,
+        channels=16,
+        pool_scales=(1, 2, 3, 6),
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=128,
+        in_index=3,
+        channels=64,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=256, stride=170))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/segformer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/segformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..347ac3d9a462d4f1218e0baf52de4ca347680ca3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/segformer.py
@@ -0,0 +1,24 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained=None,
+    backbone=dict(
+        type='IMTRv21_5',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/upernet_r50.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/upernet_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..10974962fdd7136031fd06de1700f497d355ceaa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/models/upernet_r50.py
@@ -0,0 +1,44 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True),
+    decode_head=dict(
+        type='UPerHead',
+        in_channels=[256, 512, 1024, 2048],
+        in_index=[0, 1, 2, 3],
+        pool_scales=(1, 2, 3, 6),
+        channels=512,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..52603890b10f25faf8eec9f9e5a4468fae09b811
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_160k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=160000)
+checkpoint_config = dict(by_epoch=False, interval=16000)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_160k_adamw.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_160k_adamw.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8624ab699577cd27ad42bb4053b9a73678cde1b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_160k_adamw.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=160000)
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=40000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_20k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf780a1b6f6521833c6a5859675147824efa599d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_20k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=20000)
+checkpoint_config = dict(by_epoch=False, interval=2000)
+evaluation = dict(interval=2000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_40k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_40k.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdbf841abcb26eed87bf76ab816aff4bae0630ee
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_40k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=40000)
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_40k_adamw.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_40k_adamw.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc2fcd07a6a69e42494ae8f42aa6ddce96ee336f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_40k_adamw.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=40000)
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_80k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_80k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c190cee6bdc7922b688ea75dc8f152fa15c24617
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_80k.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=80000)
+checkpoint_config = dict(by_epoch=False, interval=8000)
+evaluation = dict(interval=8000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_80k_adamw.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_80k_adamw.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3d15f6c2dfb3332088af25b749f8c4c20e831b4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/_base_/schedules/schedule_80k_adamw.py
@@ -0,0 +1,9 @@
+# optimizer
+optimizer = dict(type='AdamW', lr=0.0002, weight_decay=0.0001)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=80000)
+checkpoint_config = dict(by_epoch=False, interval=4000)
+evaluation = dict(interval=4000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f6614ba738d0042719896b6aef48e7906b48e12
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py
@@ -0,0 +1,51 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/cityscapes_1024x1024_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b0.pth',
+    backbone=dict(
+        type='mit_b0',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[32, 64, 160, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(1024,1024), stride=(768,768)))
+
+# data
+data = dict(samples_per_gpu=4, workers_per_gpu=8)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.512x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.512x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cdae7f11d8040d832b44e1af9cceeec5a649a08
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.512x1024.city.160k.py
@@ -0,0 +1,106 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='BN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b0.pth',
+    backbone=dict(
+        type='mit_b0',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[32, 64, 160, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = '/home/crm/mmsegmentation/data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 1024)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(1024, 512), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1024, 512),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=500,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='leftImg8bit/train',
+            ann_dir='gtFine/train',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.512x512.ade.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.512x512.ade.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec024b85fdf7b8071b449fb1abb5318ffb34bf2c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.512x512.ade.160k.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b0.pth',
+    backbone=dict(
+        type='mit_b0',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[32, 64, 160, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
+data = dict(samples_per_gpu=2)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.640x1280.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.640x1280.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c88ffbd8e46357ea3d71f1628bc2d6d8314a6520
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.640x1280.city.160k.py
@@ -0,0 +1,105 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b0.pth',
+    backbone=dict(
+        type='mit_b0',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[32, 64, 160, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (640, 1280)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(1280, 640), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1280, 640),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=500,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='leftImg8bit/train',
+            ann_dir='gtFine/train',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.768x768.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.768x768.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..02be43c086efa84bb9f9e907b00a76d12fc87c43
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B0/segformer.b0.768x768.city.160k.py
@@ -0,0 +1,106 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b0.pth',
+    backbone=dict(
+        type='mit_b0',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[32, 64, 160, 256],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(768,768), stride=(768,768)))
+
+# dataset settings
+dataset_type = 'CityscapesDataset'
+data_root = 'data/cityscapes/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (768, 768)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(1536, 768), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1536, 768),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type='RepeatDataset',
+        times=500,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='leftImg8bit/train',
+            ann_dir='gtFine/train',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='leftImg8bit/val',
+        ann_dir='gtFine/val',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B1/segformer.b1.1024x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B1/segformer.b1.1024x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..554492421daf3ca7a49b264860b7d90974c3bcf4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B1/segformer.b1.1024x1024.city.160k.py
@@ -0,0 +1,51 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/cityscapes_1024x1024_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b1.pth',
+    backbone=dict(
+        type='mit_b1',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(1024,1024), stride=(768,768)))
+
+# data
+data = dict(samples_per_gpu=1)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b68633b9ecdb4a053ff0ec13bb8bc62a4e8a10e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b1.pth',
+    backbone=dict(
+        type='mit_b1',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
+data = dict(samples_per_gpu=2)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B2/segformer.b2.1024x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B2/segformer.b2.1024x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc0f76e7a559bc2fc0b2bdf05f0f6fb9254dd33e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B2/segformer.b2.1024x1024.city.160k.py
@@ -0,0 +1,51 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/cityscapes_1024x1024_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b2.pth',
+    backbone=dict(
+        type='mit_b2',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(1024,1024), stride=(768,768)))
+
+# data
+data = dict(samples_per_gpu=1)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B2/segformer.b2.512x512.ade.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B2/segformer.b2.512x512.ade.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7f736bacab6fb2784cc24fe8097874e80b90e8a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B2/segformer.b2.512x512.ade.160k.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b2.pth',
+    backbone=dict(
+        type='mit_b2',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        # type='MLPHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+data = dict(samples_per_gpu=2)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B3/segformer.b3.1024x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B3/segformer.b3.1024x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a2c47a9e7bda3a9b25e1f8d79e15ba1f80d9c27
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B3/segformer.b3.1024x1024.city.160k.py
@@ -0,0 +1,51 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/cityscapes_1024x1024_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b3.pth',
+    backbone=dict(
+        type='mit_b3',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(1024,1024), stride=(768,768)))
+
+# data
+data = dict(samples_per_gpu=1)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B3/segformer.b3.512x512.ade.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B3/segformer.b3.512x512.ade.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fa27342c05ffc71a0b9a61ecb1a91cd2d2368a2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B3/segformer.b3.512x512.ade.160k.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b3.pth',
+    backbone=dict(
+        type='mit_b3',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
+data = dict(samples_per_gpu=2)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B4/segformer.b4.1024x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B4/segformer.b4.1024x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..55d515de12af55fbd00be01bdfe94a839d4cc866
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B4/segformer.b4.1024x1024.city.160k.py
@@ -0,0 +1,51 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/cityscapes_1024x1024_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b4.pth',
+    backbone=dict(
+        type='mit_b4',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(1024,1024), stride=(768,768)))
+
+# data
+data = dict(samples_per_gpu=1)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B4/segformer.b4.512x512.ade.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B4/segformer.b4.512x512.ade.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bbb6f4e8441b547a9bdd09923a296c267ca4eb4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B4/segformer.b4.512x512.ade.160k.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b4.pth',
+    backbone=dict(
+        type='mit_b4',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
+data = dict(samples_per_gpu=2)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B5/segformer.b5.1024x1024.city.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B5/segformer.b5.1024x1024.city.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..166651d759448ef22867387c231a63900a0a33e8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B5/segformer.b5.1024x1024.city.160k.py
@@ -0,0 +1,51 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/cityscapes_1024x1024_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b5.pth',
+    backbone=dict(
+        type='mit_b5',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    # test_cfg=dict(mode='whole'))
+    test_cfg=dict(mode='slide', crop_size=(1024,1024), stride=(768,768)))
+
+# data
+data = dict(samples_per_gpu=1)
+evaluation = dict(interval=4000, metric='mIoU')
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B5/segformer.b5.640x640.ade.160k.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B5/segformer.b5.640x640.ade.160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..d315f00a959a36666eaac469c601e9f99da76123
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/local_configs/segformer/B5/segformer.b5.640x640.ade.160k.py
@@ -0,0 +1,105 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    # '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_160k_adamw.py'
+]
+
+# data settings
+dataset_type = 'ADE20KDataset'
+data_root = 'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (640, 640)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', reduce_zero_label=True),
+    dict(type='Resize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2048, 640),
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='AlignedResize', keep_ratio=True, size_divisor=32), # Ensure the long and short sides are divisible by 32
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=4,
+    train=dict(
+        type='RepeatDataset',
+        times=50,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            img_dir='images/training',
+            ann_dir='annotations/training',
+            pipeline=train_pipeline)),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='images/validation',
+        ann_dir='annotations/validation',
+        pipeline=test_pipeline))
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='pretrained/mit_b5.pth',
+    backbone=dict(
+        type='mit_b5',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=768),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+evaluation = dict(interval=16000, metric='mIoU')
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/MANIFEST.in b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..16f9cc8938d9ba15f999bf69343f492837604845
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/MANIFEST.in
@@ -0,0 +1,5 @@
+include requirements/runtime.txt
+include mmcv/model_zoo/open_mmlab.json mmcv/model_zoo/deprecated.json mmcv/model_zoo/mmcls.json
+include mmcv/ops/csrc/*.cuh mmcv/ops/csrc/*.hpp
+include mmcv/ops/csrc/pytorch/*.cu mmcv/ops/csrc/pytorch/*.cpp
+include mmcv/ops/csrc/parrots/*.cu mmcv/ops/csrc/parrots/*.cpp
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/PKG-INFO b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..7dee09da7c0905f6218d4d6638e9b03db35fca9c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/PKG-INFO
@@ -0,0 +1,19 @@
+Metadata-Version: 1.1
+Name: mmcv
+Version: 1.2.7
+Summary: OpenMMLab Computer Vision Foundation
+Home-page: https://github.com/open-mmlab/mmcv
+Author: MMCV Authors
+Author-email: openmmlab@gmail.com
+License: UNKNOWN
+Description: UNKNOWN
+Keywords: computer vision
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Topic :: Utilities
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/README.md b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a6649c0e00c4e97400a4c4c4ef095e14f5ceb1e0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/README.md
@@ -0,0 +1,160 @@
+<div align="center">
+    <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/mmcv-logo.png" width="300"/>
+</div>
+
+[![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv) [![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions) [![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv) [![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE)
+
+## Introduction
+
+MMCV is a foundational python library for computer vision research and supports many
+research projects as below:
+
+- [MMDetection](https://github.com/open-mmlab/mmdetection): Detection toolbox and benchmark
+- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): General 3D object detection toolbox and benchmark
+- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): Semantic segmentation toolbox and benchmark
+- [MMEditing](https://github.com/open-mmlab/mmediting): Image and video editing toolbox
+- [MMPose](https://github.com/open-mmlab/mmpose): Pose estimation toolbox and benchmark
+- [MMAction2](https://github.com/open-mmlab/mmaction2): Action understanding toolbox and benchmark
+- [MMClassification](https://github.com/open-mmlab/mmclassification): Image classification toolbox and benchmark
+
+It provides the following functionalities.
+
+- Universal IO APIs
+- Image/Video processing
+- Image and annotation visualization
+- Useful utilities (progress bar, timer, ...)
+- PyTorch runner with hooking mechanism
+- Various CNN architectures
+- High-quality implementation of common CUDA ops
+
+See the [documentation](http://mmcv.readthedocs.io/en/latest) for more features and usage.
+
+Note: MMCV requires Python 3.6+.
+
+## Installation
+
+There are two versions of MMCV:
+
+- **mmcv**: lite, without CUDA ops but all other features, similar to mmcv<1.0.0. It is useful when you do not need those CUDA ops.
+- **mmcv-full**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build.
+
+**Note**: Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other.
+
+a. Install the lite version.
+
+```python
+pip install mmcv
+```
+
+b. Install the full version.
+
+Before installing mmcv-full, make sure that PyTorch has been successfully installed following the [official guide](https://pytorch.org/).
+
+We provide pre-built mmcv packages (recommended) with different PyTorch and CUDA versions to simplify the building.
+
+i. Install the latest version.
+
+The rule for installing the latest ``mmcv-full`` is as follows:
+
+```shell
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+```
+
+Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example,
+to install the latest ``mmcv-full`` with ``CUDA 11`` and ``PyTorch 1.7.0``, use the following command:
+
+```shell
+pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
+```
+
+For more details, please refer the the following tables and delete ``=={mmcv_version}``.
+
+ii. Install a specified version.
+
+The rule for installing a specified ``mmcv-full`` is as follows:
+
+```shell
+pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
+```
+
+First of all, please refer to the Releases and replace ``{mmcv_version}`` a specified one. e.g. ``1.2.2``.
+Then replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired versions. For example,
+to install ``mmcv-full==1.2.2`` with ``CUDA 11`` and ``PyTorch 1.7.0``, use the following command:
+
+```shell
+pip install mmcv-full==1.2.2 -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
+```
+
+For more details, please refer the the following tables.
+
+<table class="docutils">
+  <tbody>
+    <tr>
+      <th width="80"> CUDA </th>
+      <th valign="bottom" align="left" width="100">torch 1.7</th>
+      <th valign="bottom" align="left" width="100">torch 1.6</th>
+      <th valign="bottom" align="left" width="100">torch 1.5</th>
+      <th valign="bottom" align="left" width="100">torch 1.4</th>
+      <th valign="bottom" align="left" width="100">torch 1.3</th>
+    </tr>
+    <tr>
+      <td align="left">11.0</td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html</code></pre> </details> </td>
+      <td align="left"> </td>
+      <td align="left"> </td>
+      <td align="left"> </td>
+      <td align="left"> </td>
+    </tr>
+    <tr>
+      <td align="left">10.2</td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html</code></pre> </details> </td>
+      <td align="left"> </td>
+      <td align="left"> </td>
+    </tr>
+    <tr>
+      <td align="left">10.1</td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html</code></pre> </details> </td>
+    </tr>
+    <tr>
+      <td align="left">9.2</td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.4.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.3.0/index.html</code></pre> </details> </td>
+    </tr>
+    <tr>
+      <td align="left">cpu</td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.4.0/index.html</code></pre> </details> </td>
+      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.3.0/index.html</code></pre> </details> </td>
+    </tr>
+  </tbody>
+</table>
+
+Another way is to compile locally by running
+
+```python
+pip install mmcv-full
+```
+
+c. Install full version with custom operators for onnxruntime
+
+- Check [here](docs/onnxruntime_op.md) for detailed instruction.
+
+Note that the local compiling may take up to 10 mins.
+
+If you would like to build MMCV from source, please refer to the [guide](https://mmcv.readthedocs.io/en/latest/build.html).
+
+## FAQ
+
+If you face some installation issues, CUDA related issues or RuntimeErrors,
+you may first refer to this [Trouble Shooting Page](https://mmcv.readthedocs.io/en/latest/trouble_shooting.html).
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..74ee0442fc47e2dd508c77b49774b2e6adec7bfa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+# flake8: noqa
+from .arraymisc import *
+from .fileio import *
+from .image import *
+from .utils import *
+from .version import *
+from .video import *
+from .visualization import *
+
+# The following modules are not imported to this level, so mmcv may be used
+# without PyTorch.
+# - runner
+# - parallel
+# - op
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/arraymisc/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/arraymisc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e3934ca4524e87a0bc8d64016770030254e41a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/arraymisc/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .quantization import dequantize, quantize
+
+__all__ = ['quantize', 'dequantize']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/arraymisc/quantization.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/arraymisc/quantization.py
new file mode 100644
index 0000000000000000000000000000000000000000..47b6fa2a0b26996afe3408815fe4c97309fe1693
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/arraymisc/quantization.py
@@ -0,0 +1,55 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import numpy as np
+
+
+def quantize(arr, min_val, max_val, levels, dtype=np.int64):
+    """Quantize an array of (-inf, inf) to [0, levels-1].
+
+    Args:
+        arr (ndarray): Input array.
+        min_val (scalar): Minimum value to be clipped.
+        max_val (scalar): Maximum value to be clipped.
+        levels (int): Quantization levels.
+        dtype (np.type): The type of the quantized array.
+
+    Returns:
+        tuple: Quantized array.
+    """
+    if not (isinstance(levels, int) and levels > 1):
+        raise ValueError(
+            f'levels must be a positive integer, but got {levels}')
+    if min_val >= max_val:
+        raise ValueError(
+            f'min_val ({min_val}) must be smaller than max_val ({max_val})')
+
+    arr = np.clip(arr, min_val, max_val) - min_val
+    quantized_arr = np.minimum(
+        np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1)
+
+    return quantized_arr
+
+
+def dequantize(arr, min_val, max_val, levels, dtype=np.float64):
+    """Dequantize an array.
+
+    Args:
+        arr (ndarray): Input array.
+        min_val (scalar): Minimum value to be clipped.
+        max_val (scalar): Maximum value to be clipped.
+        levels (int): Quantization levels.
+        dtype (np.type): The type of the dequantized array.
+
+    Returns:
+        tuple: Dequantized array.
+    """
+    if not (isinstance(levels, int) and levels > 1):
+        raise ValueError(
+            f'levels must be a positive integer, but got {levels}')
+    if min_val >= max_val:
+        raise ValueError(
+            f'min_val ({min_val}) must be smaller than max_val ({max_val})')
+
+    dequantized_arr = (arr + 0.5).astype(dtype) * (max_val -
+                                                   min_val) / levels + min_val
+
+    return dequantized_arr
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..06f2980219d0f0185006329c3d581ddfd0569911
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/__init__.py
@@ -0,0 +1,37 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .alexnet import AlexNet
+# yapf: disable
+from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
+                     PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS,
+                     ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule,
+                     ConvTranspose2d, ConvTranspose3d, ConvWS2d,
+                     DepthwiseSeparableConvModule, GeneralizedAttention,
+                     HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d,
+                     NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish,
+                     build_activation_layer, build_conv_layer,
+                     build_norm_layer, build_padding_layer, build_plugin_layer,
+                     build_upsample_layer, conv_ws_2d, is_norm)
+# yapf: enable
+from .resnet import ResNet, make_res_layer
+from .utils import (INITIALIZERS, ConstantInit, KaimingInit, NormalInit,
+                    PretrainedInit, UniformInit, XavierInit,
+                    bias_init_with_prob, caffe2_xavier_init, constant_init,
+                    fuse_conv_bn, get_model_complexity_info, initialize,
+                    kaiming_init, normal_init, uniform_init, xavier_init)
+from .vgg import VGG, make_vgg_layer
+
+__all__ = [
+    'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer',
+    'constant_init', 'xavier_init', 'normal_init', 'uniform_init',
+    'kaiming_init', 'caffe2_xavier_init', 'bias_init_with_prob', 'ConvModule',
+    'build_activation_layer', 'build_conv_layer', 'build_norm_layer',
+    'build_padding_layer', 'build_upsample_layer', 'build_plugin_layer',
+    'is_norm', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d', 'ContextBlock',
+    'HSigmoid', 'Swish', 'HSwish', 'GeneralizedAttention', 'ACTIVATION_LAYERS',
+    'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', 'UPSAMPLE_LAYERS',
+    'PLUGIN_LAYERS', 'Scale', 'get_model_complexity_info', 'conv_ws_2d',
+    'ConvAWS2d', 'ConvWS2d', 'fuse_conv_bn', 'DepthwiseSeparableConvModule',
+    'Linear', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d',
+    'MaxPool3d', 'Conv3d', 'initialize', 'INITIALIZERS', 'ConstantInit',
+    'XavierInit', 'NormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/alexnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3938d5cd2868c48f5f875287a4a4fea3c970072f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/alexnet.py
@@ -0,0 +1,61 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import logging
+
+import torch.nn as nn
+
+
+class AlexNet(nn.Module):
+    """AlexNet backbone.
+
+    Args:
+        num_classes (int): number of classes for classification.
+    """
+
+    def __init__(self, num_classes=-1):
+        super(AlexNet, self).__init__()
+        self.num_classes = num_classes
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(64, 192, kernel_size=5, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(192, 384, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(384, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        )
+        if self.num_classes > 0:
+            self.classifier = nn.Sequential(
+                nn.Dropout(),
+                nn.Linear(256 * 6 * 6, 4096),
+                nn.ReLU(inplace=True),
+                nn.Dropout(),
+                nn.Linear(4096, 4096),
+                nn.ReLU(inplace=True),
+                nn.Linear(4096, num_classes),
+            )
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            from ..runner import load_checkpoint
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            # use default initializer
+            pass
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+
+        x = self.features(x)
+        if self.num_classes > 0:
+            x = x.view(x.size(0), 256 * 6 * 6)
+            x = self.classifier(x)
+
+        return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f9a99c7143911a52d55f5f9649840e9b6ce194b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/__init__.py
@@ -0,0 +1,33 @@
+from .activation import build_activation_layer
+from .context_block import ContextBlock
+from .conv import build_conv_layer
+from .conv2d_adaptive_padding import Conv2dAdaptivePadding
+from .conv_module import ConvModule
+from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d
+from .depthwise_separable_conv_module import DepthwiseSeparableConvModule
+from .generalized_attention import GeneralizedAttention
+from .hsigmoid import HSigmoid
+from .hswish import HSwish
+from .non_local import NonLocal1d, NonLocal2d, NonLocal3d
+from .norm import build_norm_layer, is_norm
+from .padding import build_padding_layer
+from .plugin import build_plugin_layer
+from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
+                       PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS)
+from .scale import Scale
+from .swish import Swish
+from .upsample import build_upsample_layer
+from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
+                       Linear, MaxPool2d, MaxPool3d)
+
+__all__ = [
+    'ConvModule', 'build_activation_layer', 'build_conv_layer',
+    'build_norm_layer', 'build_padding_layer', 'build_upsample_layer',
+    'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d',
+    'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention',
+    'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS',
+    'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d',
+    'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear',
+    'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d',
+    'ConvTranspose3d', 'MaxPool3d', 'Conv3d'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/activation.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b11f2678529af4b74fbc8f5e7e4e9b89f7970f2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/activation.py
@@ -0,0 +1,57 @@
+import torch
+import torch.nn as nn
+
+from mmcv.utils import build_from_cfg
+from .registry import ACTIVATION_LAYERS
+
+for module in [
+        nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU,
+        nn.Sigmoid, nn.Tanh
+]:
+    ACTIVATION_LAYERS.register_module(module=module)
+
+
+@ACTIVATION_LAYERS.register_module(name='Clip')
+@ACTIVATION_LAYERS.register_module()
+class Clamp(nn.Module):
+    """Clamp activation layer.
+
+    This activation function is to clamp the feature map value within
+    :math:`[min, max]`. More details can be found in ``torch.clamp()``.
+
+    Args:
+        min (Number | optional): Lower-bound of the range to be clamped to.
+            Default to -1.
+        max (Number | optional): Upper-bound of the range to be clamped to.
+            Default to 1.
+    """
+
+    def __init__(self, min=-1., max=1.):
+        super(Clamp, self).__init__()
+        self.min = min
+        self.max = max
+
+    def forward(self, x):
+        """Forward function.
+
+        Args:
+            x (torch.Tensor): The input tensor.
+
+        Returns:
+            torch.Tensor: Clamped tensor.
+        """
+        return torch.clamp(x, min=self.min, max=self.max)
+
+
+def build_activation_layer(cfg):
+    """Build activation layer.
+
+    Args:
+        cfg (dict): The activation layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate an activation layer.
+
+    Returns:
+        nn.Module: Created activation layer.
+    """
+    return build_from_cfg(cfg, ACTIVATION_LAYERS)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/context_block.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/context_block.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb60abbf1613754830affcf5e6a9ffa8993bcd87
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/context_block.py
@@ -0,0 +1,124 @@
+import torch
+from torch import nn
+
+from ..utils import constant_init, kaiming_init
+from .registry import PLUGIN_LAYERS
+
+
+def last_zero_init(m):
+    if isinstance(m, nn.Sequential):
+        constant_init(m[-1], val=0)
+    else:
+        constant_init(m, val=0)
+
+
+@PLUGIN_LAYERS.register_module()
+class ContextBlock(nn.Module):
+    """ContextBlock module in GCNet.
+
+    See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'
+    (https://arxiv.org/abs/1904.11492) for details.
+
+    Args:
+        in_channels (int): Channels of the input feature map.
+        ratio (float): Ratio of channels of transform bottleneck
+        pooling_type (str): Pooling method for context modeling.
+            Options are 'att' and 'avg', stand for attention pooling and
+            average pooling respectively. Default: 'att'.
+        fusion_types (Sequence[str]): Fusion method for feature fusion,
+            Options are 'channels_add', 'channel_mul', stand for channelwise
+            addition and multiplication respectively. Default: ('channel_add',)
+    """
+
+    _abbr_ = 'context_block'
+
+    def __init__(self,
+                 in_channels,
+                 ratio,
+                 pooling_type='att',
+                 fusion_types=('channel_add', )):
+        super(ContextBlock, self).__init__()
+        assert pooling_type in ['avg', 'att']
+        assert isinstance(fusion_types, (list, tuple))
+        valid_fusion_types = ['channel_add', 'channel_mul']
+        assert all([f in valid_fusion_types for f in fusion_types])
+        assert len(fusion_types) > 0, 'at least one fusion should be used'
+        self.in_channels = in_channels
+        self.ratio = ratio
+        self.planes = int(in_channels * ratio)
+        self.pooling_type = pooling_type
+        self.fusion_types = fusion_types
+        if pooling_type == 'att':
+            self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1)
+            self.softmax = nn.Softmax(dim=2)
+        else:
+            self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        if 'channel_add' in fusion_types:
+            self.channel_add_conv = nn.Sequential(
+                nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
+                nn.LayerNorm([self.planes, 1, 1]),
+                nn.ReLU(inplace=True),  # yapf: disable
+                nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
+        else:
+            self.channel_add_conv = None
+        if 'channel_mul' in fusion_types:
+            self.channel_mul_conv = nn.Sequential(
+                nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
+                nn.LayerNorm([self.planes, 1, 1]),
+                nn.ReLU(inplace=True),  # yapf: disable
+                nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
+        else:
+            self.channel_mul_conv = None
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if self.pooling_type == 'att':
+            kaiming_init(self.conv_mask, mode='fan_in')
+            self.conv_mask.inited = True
+
+        if self.channel_add_conv is not None:
+            last_zero_init(self.channel_add_conv)
+        if self.channel_mul_conv is not None:
+            last_zero_init(self.channel_mul_conv)
+
+    def spatial_pool(self, x):
+        batch, channel, height, width = x.size()
+        if self.pooling_type == 'att':
+            input_x = x
+            # [N, C, H * W]
+            input_x = input_x.view(batch, channel, height * width)
+            # [N, 1, C, H * W]
+            input_x = input_x.unsqueeze(1)
+            # [N, 1, H, W]
+            context_mask = self.conv_mask(x)
+            # [N, 1, H * W]
+            context_mask = context_mask.view(batch, 1, height * width)
+            # [N, 1, H * W]
+            context_mask = self.softmax(context_mask)
+            # [N, 1, H * W, 1]
+            context_mask = context_mask.unsqueeze(-1)
+            # [N, 1, C, 1]
+            context = torch.matmul(input_x, context_mask)
+            # [N, C, 1, 1]
+            context = context.view(batch, channel, 1, 1)
+        else:
+            # [N, C, 1, 1]
+            context = self.avg_pool(x)
+
+        return context
+
+    def forward(self, x):
+        # [N, C, 1, 1]
+        context = self.spatial_pool(x)
+
+        out = x
+        if self.channel_mul_conv is not None:
+            # [N, C, 1, 1]
+            channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
+            out = out * channel_mul_term
+        if self.channel_add_conv is not None:
+            # [N, C, 1, 1]
+            channel_add_term = self.channel_add_conv(context)
+            out = out + channel_add_term
+
+        return out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..e84cfed060a4458438015afb58db6d6b7aa6474d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv.py
@@ -0,0 +1,43 @@
+from torch import nn
+
+from .registry import CONV_LAYERS
+
+CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d)
+CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d)
+CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d)
+CONV_LAYERS.register_module('Conv', module=nn.Conv2d)
+
+
+def build_conv_layer(cfg, *args, **kwargs):
+    """Build convolution layer.
+
+    Args:
+        cfg (None or dict): The conv layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate an activation layer.
+        args (argument list): Arguments passed to the `__init__`
+            method of the corresponding conv layer.
+        kwargs (keyword arguments): Keyword arguments passed to the `__init__`
+            method of the corresponding conv layer.
+
+    Returns:
+        nn.Module: Created conv layer.
+    """
+    if cfg is None:
+        cfg_ = dict(type='Conv2d')
+    else:
+        if not isinstance(cfg, dict):
+            raise TypeError('cfg must be a dict')
+        if 'type' not in cfg:
+            raise KeyError('the cfg dict must contain the key "type"')
+        cfg_ = cfg.copy()
+
+    layer_type = cfg_.pop('type')
+    if layer_type not in CONV_LAYERS:
+        raise KeyError(f'Unrecognized norm type {layer_type}')
+    else:
+        conv_layer = CONV_LAYERS.get(layer_type)
+
+    layer = conv_layer(*args, **kwargs, **cfg_)
+
+    return layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv2d_adaptive_padding.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b636b034559e5a74f60642e9ec7c6202674a057
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv2d_adaptive_padding.py
@@ -0,0 +1,61 @@
+import math
+
+from torch import nn
+from torch.nn import functional as F
+
+from .registry import CONV_LAYERS
+
+
+@CONV_LAYERS.register_module()
+class Conv2dAdaptivePadding(nn.Conv2d):
+    """Implementation of 2D convolution in tensorflow with `padding` as "same",
+    which applies padding to input (if needed) so that input image gets fully
+    covered by filter and stride you specified. For stride 1, this will ensure
+    that output image size is same as input. For stride of 2, output dimensions
+    will be half, for example.
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of
+            the input. Default: 0
+        dilation (int or tuple, optional): Spacing between kernel elements.
+            Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the
+            output. Default: ``True``
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True):
+        super().__init__(in_channels, out_channels, kernel_size, stride, 0,
+                         dilation, groups, bias)
+
+    def forward(self, x):
+        img_h, img_w = x.size()[-2:]
+        kernel_h, kernel_w = self.weight.size()[-2:]
+        stride_h, stride_w = self.stride
+        output_h = math.ceil(img_h / stride_h)
+        output_w = math.ceil(img_w / stride_w)
+        pad_h = (
+            max((output_h - 1) * self.stride[0] +
+                (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0))
+        pad_w = (
+            max((output_w - 1) * self.stride[1] +
+                (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0))
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [
+                pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2
+            ])
+        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv_module.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..e158b628cf0ce327dffbfff920fdf503d8b1183f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv_module.py
@@ -0,0 +1,198 @@
+import warnings
+
+import torch.nn as nn
+
+from ..utils import constant_init, kaiming_init
+from .activation import build_activation_layer
+from .conv import build_conv_layer
+from .norm import build_norm_layer
+from .padding import build_padding_layer
+from .registry import PLUGIN_LAYERS
+
+
+@PLUGIN_LAYERS.register_module()
+class ConvModule(nn.Module):
+    """A conv block that bundles conv/norm/activation layers.
+
+    This block simplifies the usage of convolution layers, which are commonly
+    used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
+    It is based upon three build methods: `build_conv_layer()`,
+    `build_norm_layer()` and `build_activation_layer()`.
+
+    Besides, we add some additional features in this module.
+    1. Automatically set `bias` of the conv layer.
+    2. Spectral norm is supported.
+    3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only
+    supports zero and circular padding, and we add "reflect" padding mode.
+
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+            Same as that in ``nn._ConvNd``.
+        out_channels (int): Number of channels produced by the convolution.
+            Same as that in ``nn._ConvNd``.
+        kernel_size (int | tuple[int]): Size of the convolving kernel.
+            Same as that in ``nn._ConvNd``.
+        stride (int | tuple[int]): Stride of the convolution.
+            Same as that in ``nn._ConvNd``.
+        padding (int | tuple[int]): Zero-padding added to both sides of
+            the input. Same as that in ``nn._ConvNd``.
+        dilation (int | tuple[int]): Spacing between kernel elements.
+            Same as that in ``nn._ConvNd``.
+        groups (int): Number of blocked connections from input channels to
+            output channels. Same as that in ``nn._ConvNd``.
+        bias (bool | str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
+            False. Default: "auto".
+        conv_cfg (dict): Config dict for convolution layer. Default: None,
+            which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU').
+        inplace (bool): Whether to use inplace mode for activation.
+            Default: True.
+        with_spectral_norm (bool): Whether use spectral norm in conv module.
+            Default: False.
+        padding_mode (str): If the `padding_mode` has not been supported by
+            current `Conv2d` in PyTorch, we will use our own padding layer
+            instead. Currently, we support ['zeros', 'circular'] with official
+            implementation and ['reflect'] with our own implementation.
+            Default: 'zeros'.
+        order (tuple[str]): The order of conv/norm/activation layers. It is a
+            sequence of "conv", "norm" and "act". Common examples are
+            ("conv", "norm", "act") and ("act", "conv", "norm").
+            Default: ('conv', 'norm', 'act').
+    """
+
+    _abbr_ = 'conv_block'
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias='auto',
+                 conv_cfg=None,
+                 norm_cfg=None,
+                 act_cfg=dict(type='ReLU'),
+                 inplace=True,
+                 with_spectral_norm=False,
+                 padding_mode='zeros',
+                 order=('conv', 'norm', 'act')):
+        super(ConvModule, self).__init__()
+        assert conv_cfg is None or isinstance(conv_cfg, dict)
+        assert norm_cfg is None or isinstance(norm_cfg, dict)
+        assert act_cfg is None or isinstance(act_cfg, dict)
+        official_padding_mode = ['zeros', 'circular']
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.inplace = inplace
+        self.with_spectral_norm = with_spectral_norm
+        self.with_explicit_padding = padding_mode not in official_padding_mode
+        self.order = order
+        assert isinstance(self.order, tuple) and len(self.order) == 3
+        assert set(order) == set(['conv', 'norm', 'act'])
+
+        self.with_norm = norm_cfg is not None
+        self.with_activation = act_cfg is not None
+        # if the conv layer is before a norm layer, bias is unnecessary.
+        if bias == 'auto':
+            bias = not self.with_norm
+        self.with_bias = bias
+
+        if self.with_norm and self.with_bias:
+            warnings.warn('ConvModule has norm and bias at the same time')
+
+        if self.with_explicit_padding:
+            pad_cfg = dict(type=padding_mode)
+            self.padding_layer = build_padding_layer(pad_cfg, padding)
+
+        # reset padding to 0 for conv module
+        conv_padding = 0 if self.with_explicit_padding else padding
+        # build convolution layer
+        self.conv = build_conv_layer(
+            conv_cfg,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=conv_padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        # export the attributes of self.conv to a higher level for convenience
+        self.in_channels = self.conv.in_channels
+        self.out_channels = self.conv.out_channels
+        self.kernel_size = self.conv.kernel_size
+        self.stride = self.conv.stride
+        self.padding = padding
+        self.dilation = self.conv.dilation
+        self.transposed = self.conv.transposed
+        self.output_padding = self.conv.output_padding
+        self.groups = self.conv.groups
+
+        if self.with_spectral_norm:
+            self.conv = nn.utils.spectral_norm(self.conv)
+
+        # build normalization layers
+        if self.with_norm:
+            # norm layer is after conv layer
+            if order.index('norm') > order.index('conv'):
+                norm_channels = out_channels
+            else:
+                norm_channels = in_channels
+            self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
+            self.add_module(self.norm_name, norm)
+
+        # build activation layer
+        if self.with_activation:
+            act_cfg_ = act_cfg.copy()
+            # nn.Tanh has no 'inplace' argument
+            if act_cfg_['type'] not in [
+                    'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish'
+            ]:
+                act_cfg_.setdefault('inplace', inplace)
+            self.activate = build_activation_layer(act_cfg_)
+
+        # Use msra init by default
+        self.init_weights()
+
+    @property
+    def norm(self):
+        return getattr(self, self.norm_name)
+
+    def init_weights(self):
+        # 1. It is mainly for customized conv layers with their own
+        #    initialization manners by calling their own ``init_weights()``,
+        #    and we do not want ConvModule to override the initialization.
+        # 2. For customized conv layers without their own initialization
+        #    manners (that is, they don't have their own ``init_weights()``)
+        #    and PyTorch's conv layers, they will be initialized by
+        #    this method with default ``kaiming_init``.
+        # Note: For PyTorch's conv layers, they will be overwritten by our
+        #    initialization implementation using default ``kaiming_init``.
+        if not hasattr(self.conv, 'init_weights'):
+            if self.with_activation and self.act_cfg['type'] == 'LeakyReLU':
+                nonlinearity = 'leaky_relu'
+                a = self.act_cfg.get('negative_slope', 0.01)
+            else:
+                nonlinearity = 'relu'
+                a = 0
+            kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
+        if self.with_norm:
+            constant_init(self.norm, 1, bias=0)
+
+    def forward(self, x, activate=True, norm=True):
+        for layer in self.order:
+            if layer == 'conv':
+                if self.with_explicit_padding:
+                    x = self.padding_layer(x)
+                x = self.conv(x)
+            elif layer == 'norm' and norm and self.with_norm:
+                x = self.norm(x)
+            elif layer == 'act' and activate and self.with_activation:
+                x = self.activate(x)
+        return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv_ws.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv_ws.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dea2312fb9eb70adba18d602879845e02c0c696
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/conv_ws.py
@@ -0,0 +1,147 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .registry import CONV_LAYERS
+
+
+def conv_ws_2d(input,
+               weight,
+               bias=None,
+               stride=1,
+               padding=0,
+               dilation=1,
+               groups=1,
+               eps=1e-5):
+    c_in = weight.size(0)
+    weight_flat = weight.view(c_in, -1)
+    mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+    std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+    weight = (weight - mean) / (std + eps)
+    return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
+
+
+@CONV_LAYERS.register_module('ConvWS')
+class ConvWS2d(nn.Conv2d):
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 eps=1e-5):
+        super(ConvWS2d, self).__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        self.eps = eps
+
+    def forward(self, x):
+        return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
+                          self.dilation, self.groups, self.eps)
+
+
+@CONV_LAYERS.register_module(name='ConvAWS')
+class ConvAWS2d(nn.Conv2d):
+    """AWS (Adaptive Weight Standardization)
+
+    This is a variant of Weight Standardization
+    (https://arxiv.org/pdf/1903.10520.pdf)
+    It is used in DetectoRS to avoid NaN
+    (https://arxiv.org/pdf/2006.02334.pdf)
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the conv kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of
+            the input. Default: 0
+        dilation (int or tuple, optional): Spacing between kernel elements.
+            Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If set True, adds a learnable bias to the
+            output. Default: True
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True):
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        self.register_buffer('weight_gamma',
+                             torch.ones(self.out_channels, 1, 1, 1))
+        self.register_buffer('weight_beta',
+                             torch.zeros(self.out_channels, 1, 1, 1))
+
+    def _get_weight(self, weight):
+        weight_flat = weight.view(weight.size(0), -1)
+        mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
+        std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
+        weight = (weight - mean) / std
+        weight = self.weight_gamma * weight + self.weight_beta
+        return weight
+
+    def forward(self, x):
+        weight = self._get_weight(self.weight)
+        return F.conv2d(x, weight, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        """Override default load function.
+
+        AWS overrides the function _load_from_state_dict to recover
+        weight_gamma and weight_beta if they are missing. If weight_gamma and
+        weight_beta are found in the checkpoint, this function will return
+        after super()._load_from_state_dict. Otherwise, it will compute the
+        mean and std of the pretrained weights and store them in weight_beta
+        and weight_gamma.
+        """
+
+        self.weight_gamma.data.fill_(-1)
+        local_missing_keys = []
+        super()._load_from_state_dict(state_dict, prefix, local_metadata,
+                                      strict, local_missing_keys,
+                                      unexpected_keys, error_msgs)
+        if self.weight_gamma.data.mean() > 0:
+            for k in local_missing_keys:
+                missing_keys.append(k)
+            return
+        weight = self.weight.data
+        weight_flat = weight.view(weight.size(0), -1)
+        mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
+        std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
+        self.weight_beta.data.copy_(mean)
+        self.weight_gamma.data.copy_(std)
+        missing_gamma_beta = [
+            k for k in local_missing_keys
+            if k.endswith('weight_gamma') or k.endswith('weight_beta')
+        ]
+        for k in missing_gamma_beta:
+            local_missing_keys.remove(k)
+        for k in local_missing_keys:
+            missing_keys.append(k)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/depthwise_separable_conv_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..aee8b7f63bfae94b358fab01de74d6158145ea3d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/depthwise_separable_conv_module.py
@@ -0,0 +1,95 @@
+import torch.nn as nn
+
+from .conv_module import ConvModule
+
+
+class DepthwiseSeparableConvModule(nn.Module):
+    """Depthwise separable convolution module.
+
+    See https://arxiv.org/pdf/1704.04861.pdf for details.
+
+    This module can replace a ConvModule with the conv block replaced by two
+    conv block: depthwise conv block and pointwise conv block. The depthwise
+    conv block contains depthwise-conv/norm/activation layers. The pointwise
+    conv block contains pointwise-conv/norm/activation layers. It should be
+    noted that there will be norm/activation layer in the depthwise conv block
+    if `norm_cfg` and `act_cfg` are specified.
+
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+            Same as that in ``nn._ConvNd``.
+        out_channels (int): Number of channels produced by the convolution.
+            Same as that in ``nn._ConvNd``.
+        kernel_size (int | tuple[int]): Size of the convolving kernel.
+            Same as that in ``nn._ConvNd``.
+        stride (int | tuple[int]): Stride of the convolution.
+            Same as that in ``nn._ConvNd``. Default: 1.
+        padding (int | tuple[int]): Zero-padding added to both sides of
+            the input. Same as that in ``nn._ConvNd``. Default: 0.
+        dilation (int | tuple[int]): Spacing between kernel elements.
+            Same as that in ``nn._ConvNd``. Default: 1.
+        norm_cfg (dict): Default norm config for both depthwise ConvModule and
+            pointwise ConvModule. Default: None.
+        act_cfg (dict): Default activation config for both depthwise ConvModule
+            and pointwise ConvModule. Default: dict(type='ReLU').
+        dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
+            'default', it will be the same as `norm_cfg`. Default: 'default'.
+        dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
+            'default', it will be the same as `act_cfg`. Default: 'default'.
+        pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
+            'default', it will be the same as `norm_cfg`. Default: 'default'.
+        pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
+            'default', it will be the same as `act_cfg`. Default: 'default'.
+        kwargs (optional): Other shared arguments for depthwise and pointwise
+            ConvModule. See ConvModule for ref.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 norm_cfg=None,
+                 act_cfg=dict(type='ReLU'),
+                 dw_norm_cfg='default',
+                 dw_act_cfg='default',
+                 pw_norm_cfg='default',
+                 pw_act_cfg='default',
+                 **kwargs):
+        super(DepthwiseSeparableConvModule, self).__init__()
+        assert 'groups' not in kwargs, 'groups should not be specified'
+
+        # if norm/activation config of depthwise/pointwise ConvModule is not
+        # specified, use default config.
+        dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
+        dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
+        pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
+        pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
+
+        # depthwise convolution
+        self.depthwise_conv = ConvModule(
+            in_channels,
+            in_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            norm_cfg=dw_norm_cfg,
+            act_cfg=dw_act_cfg,
+            **kwargs)
+
+        self.pointwise_conv = ConvModule(
+            in_channels,
+            out_channels,
+            1,
+            norm_cfg=pw_norm_cfg,
+            act_cfg=pw_act_cfg,
+            **kwargs)
+
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.pointwise_conv(x)
+        return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/generalized_attention.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/generalized_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a779bf07df697acd8b08d3d0b94178aaa121853
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/generalized_attention.py
@@ -0,0 +1,405 @@
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..utils import kaiming_init
+from .registry import PLUGIN_LAYERS
+
+
+@PLUGIN_LAYERS.register_module()
+class GeneralizedAttention(nn.Module):
+    """GeneralizedAttention module.
+
+    See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
+    (https://arxiv.org/abs/1711.07971) for details.
+
+    Args:
+        in_channels (int): Channels of the input feature map.
+        spatial_range (int): The spatial range. -1 indicates no spatial range
+            constraint. Default: -1.
+        num_heads (int): The head number of empirical_attention module.
+            Default: 9.
+        position_embedding_dim (int): The position embedding dimension.
+            Default: -1.
+        position_magnitude (int): A multiplier acting on coord difference.
+            Default: 1.
+        kv_stride (int): The feature stride acting on key/value feature map.
+            Default: 2.
+        q_stride (int): The feature stride acting on query feature map.
+            Default: 1.
+        attention_type (str): A binary indicator string for indicating which
+            items in generalized empirical_attention module are used.
+            Default: '1111'.
+
+            - '1000' indicates 'query and key content' (appr - appr) item,
+            - '0100' indicates 'query content and relative position'
+              (appr - position) item,
+            - '0010' indicates 'key content only' (bias - appr) item,
+            - '0001' indicates 'relative position only' (bias - position) item.
+    """
+
+    _abbr_ = 'gen_attention_block'
+
+    def __init__(self,
+                 in_channels,
+                 spatial_range=-1,
+                 num_heads=9,
+                 position_embedding_dim=-1,
+                 position_magnitude=1,
+                 kv_stride=2,
+                 q_stride=1,
+                 attention_type='1111'):
+
+        super(GeneralizedAttention, self).__init__()
+
+        # hard range means local range for non-local operation
+        self.position_embedding_dim = (
+            position_embedding_dim
+            if position_embedding_dim > 0 else in_channels)
+
+        self.position_magnitude = position_magnitude
+        self.num_heads = num_heads
+        self.in_channels = in_channels
+        self.spatial_range = spatial_range
+        self.kv_stride = kv_stride
+        self.q_stride = q_stride
+        self.attention_type = [bool(int(_)) for _ in attention_type]
+        self.qk_embed_dim = in_channels // num_heads
+        out_c = self.qk_embed_dim * num_heads
+
+        if self.attention_type[0] or self.attention_type[1]:
+            self.query_conv = nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_c,
+                kernel_size=1,
+                bias=False)
+            self.query_conv.kaiming_init = True
+
+        if self.attention_type[0] or self.attention_type[2]:
+            self.key_conv = nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_c,
+                kernel_size=1,
+                bias=False)
+            self.key_conv.kaiming_init = True
+
+        self.v_dim = in_channels // num_heads
+        self.value_conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=self.v_dim * num_heads,
+            kernel_size=1,
+            bias=False)
+        self.value_conv.kaiming_init = True
+
+        if self.attention_type[1] or self.attention_type[3]:
+            self.appr_geom_fc_x = nn.Linear(
+                self.position_embedding_dim // 2, out_c, bias=False)
+            self.appr_geom_fc_x.kaiming_init = True
+
+            self.appr_geom_fc_y = nn.Linear(
+                self.position_embedding_dim // 2, out_c, bias=False)
+            self.appr_geom_fc_y.kaiming_init = True
+
+        if self.attention_type[2]:
+            stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+            appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+            self.appr_bias = nn.Parameter(appr_bias_value)
+
+        if self.attention_type[3]:
+            stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+            geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+            self.geom_bias = nn.Parameter(geom_bias_value)
+
+        self.proj_conv = nn.Conv2d(
+            in_channels=self.v_dim * num_heads,
+            out_channels=in_channels,
+            kernel_size=1,
+            bias=True)
+        self.proj_conv.kaiming_init = True
+        self.gamma = nn.Parameter(torch.zeros(1))
+
+        if self.spatial_range >= 0:
+            # only works when non local is after 3*3 conv
+            if in_channels == 256:
+                max_len = 84
+            elif in_channels == 512:
+                max_len = 42
+
+            max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
+            local_constraint_map = np.ones(
+                (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int)
+            for iy in range(max_len):
+                for ix in range(max_len):
+                    local_constraint_map[
+                        iy, ix,
+                        max((iy - self.spatial_range) //
+                            self.kv_stride, 0):min((iy + self.spatial_range +
+                                                    1) // self.kv_stride +
+                                                   1, max_len),
+                        max((ix - self.spatial_range) //
+                            self.kv_stride, 0):min((ix + self.spatial_range +
+                                                    1) // self.kv_stride +
+                                                   1, max_len)] = 0
+
+            self.local_constraint_map = nn.Parameter(
+                torch.from_numpy(local_constraint_map).byte(),
+                requires_grad=False)
+
+        if self.q_stride > 1:
+            self.q_downsample = nn.AvgPool2d(
+                kernel_size=1, stride=self.q_stride)
+        else:
+            self.q_downsample = None
+
+        if self.kv_stride > 1:
+            self.kv_downsample = nn.AvgPool2d(
+                kernel_size=1, stride=self.kv_stride)
+        else:
+            self.kv_downsample = None
+
+        self.init_weights()
+
+    def get_position_embedding(self,
+                               h,
+                               w,
+                               h_kv,
+                               w_kv,
+                               q_stride,
+                               kv_stride,
+                               device,
+                               feat_dim,
+                               wave_length=1000):
+        h_idxs = torch.linspace(0, h - 1, h).to(device)
+        h_idxs = h_idxs.view((h, 1)) * q_stride
+
+        w_idxs = torch.linspace(0, w - 1, w).to(device)
+        w_idxs = w_idxs.view((w, 1)) * q_stride
+
+        h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(device)
+        h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride
+
+        w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(device)
+        w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride
+
+        # (h, h_kv, 1)
+        h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
+        h_diff *= self.position_magnitude
+
+        # (w, w_kv, 1)
+        w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
+        w_diff *= self.position_magnitude
+
+        feat_range = torch.arange(0, feat_dim / 4).to(device)
+
+        dim_mat = torch.Tensor([wave_length]).to(device)
+        dim_mat = dim_mat**((4. / feat_dim) * feat_range)
+        dim_mat = dim_mat.view((1, 1, -1))
+
+        embedding_x = torch.cat(
+            ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)
+
+        embedding_y = torch.cat(
+            ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)
+
+        return embedding_x, embedding_y
+
+    def forward(self, x_input):
+        num_heads = self.num_heads
+
+        # use empirical_attention
+        if self.q_downsample is not None:
+            x_q = self.q_downsample(x_input)
+        else:
+            x_q = x_input
+        n, _, h, w = x_q.shape
+
+        if self.kv_downsample is not None:
+            x_kv = self.kv_downsample(x_input)
+        else:
+            x_kv = x_input
+        _, _, h_kv, w_kv = x_kv.shape
+
+        if self.attention_type[0] or self.attention_type[1]:
+            proj_query = self.query_conv(x_q).view(
+                (n, num_heads, self.qk_embed_dim, h * w))
+            proj_query = proj_query.permute(0, 1, 3, 2)
+
+        if self.attention_type[0] or self.attention_type[2]:
+            proj_key = self.key_conv(x_kv).view(
+                (n, num_heads, self.qk_embed_dim, h_kv * w_kv))
+
+        if self.attention_type[1] or self.attention_type[3]:
+            position_embed_x, position_embed_y = self.get_position_embedding(
+                h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
+                x_input.device, self.position_embedding_dim)
+            # (n, num_heads, w, w_kv, dim)
+            position_feat_x = self.appr_geom_fc_x(position_embed_x).\
+                view(1, w, w_kv, num_heads, self.qk_embed_dim).\
+                permute(0, 3, 1, 2, 4).\
+                repeat(n, 1, 1, 1, 1)
+
+            # (n, num_heads, h, h_kv, dim)
+            position_feat_y = self.appr_geom_fc_y(position_embed_y).\
+                view(1, h, h_kv, num_heads, self.qk_embed_dim).\
+                permute(0, 3, 1, 2, 4).\
+                repeat(n, 1, 1, 1, 1)
+
+            position_feat_x /= math.sqrt(2)
+            position_feat_y /= math.sqrt(2)
+
+        # accelerate for saliency only
+        if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
+            appr_bias = self.appr_bias.\
+                view(1, num_heads, 1, self.qk_embed_dim).\
+                repeat(n, 1, 1, 1)
+
+            energy = torch.matmul(appr_bias, proj_key).\
+                view(n, num_heads, 1, h_kv * w_kv)
+
+            h = 1
+            w = 1
+        else:
+            # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
+            if not self.attention_type[0]:
+                energy = torch.zeros(
+                    n,
+                    num_heads,
+                    h,
+                    w,
+                    h_kv,
+                    w_kv,
+                    dtype=x_input.dtype,
+                    device=x_input.device)
+
+            # attention_type[0]: appr - appr
+            # attention_type[1]: appr - position
+            # attention_type[2]: bias - appr
+            # attention_type[3]: bias - position
+            if self.attention_type[0] or self.attention_type[2]:
+                if self.attention_type[0] and self.attention_type[2]:
+                    appr_bias = self.appr_bias.\
+                        view(1, num_heads, 1, self.qk_embed_dim)
+                    energy = torch.matmul(proj_query + appr_bias, proj_key).\
+                        view(n, num_heads, h, w, h_kv, w_kv)
+
+                elif self.attention_type[0]:
+                    energy = torch.matmul(proj_query, proj_key).\
+                        view(n, num_heads, h, w, h_kv, w_kv)
+
+                elif self.attention_type[2]:
+                    appr_bias = self.appr_bias.\
+                        view(1, num_heads, 1, self.qk_embed_dim).\
+                        repeat(n, 1, 1, 1)
+
+                    energy += torch.matmul(appr_bias, proj_key).\
+                        view(n, num_heads, 1, 1, h_kv, w_kv)
+
+            if self.attention_type[1] or self.attention_type[3]:
+                if self.attention_type[1] and self.attention_type[3]:
+                    geom_bias = self.geom_bias.\
+                        view(1, num_heads, 1, self.qk_embed_dim)
+
+                    proj_query_reshape = (proj_query + geom_bias).\
+                        view(n, num_heads, h, w, self.qk_embed_dim)
+
+                    energy_x = torch.matmul(
+                        proj_query_reshape.permute(0, 1, 3, 2, 4),
+                        position_feat_x.permute(0, 1, 2, 4, 3))
+                    energy_x = energy_x.\
+                        permute(0, 1, 3, 2, 4).unsqueeze(4)
+
+                    energy_y = torch.matmul(
+                        proj_query_reshape,
+                        position_feat_y.permute(0, 1, 2, 4, 3))
+                    energy_y = energy_y.unsqueeze(5)
+
+                    energy += energy_x + energy_y
+
+                elif self.attention_type[1]:
+                    proj_query_reshape = proj_query.\
+                        view(n, num_heads, h, w, self.qk_embed_dim)
+                    proj_query_reshape = proj_query_reshape.\
+                        permute(0, 1, 3, 2, 4)
+                    position_feat_x_reshape = position_feat_x.\
+                        permute(0, 1, 2, 4, 3)
+                    position_feat_y_reshape = position_feat_y.\
+                        permute(0, 1, 2, 4, 3)
+
+                    energy_x = torch.matmul(proj_query_reshape,
+                                            position_feat_x_reshape)
+                    energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)
+
+                    energy_y = torch.matmul(proj_query_reshape,
+                                            position_feat_y_reshape)
+                    energy_y = energy_y.unsqueeze(5)
+
+                    energy += energy_x + energy_y
+
+                elif self.attention_type[3]:
+                    geom_bias = self.geom_bias.\
+                        view(1, num_heads, self.qk_embed_dim, 1).\
+                        repeat(n, 1, 1, 1)
+
+                    position_feat_x_reshape = position_feat_x.\
+                        view(n, num_heads, w*w_kv, self.qk_embed_dim)
+
+                    position_feat_y_reshape = position_feat_y.\
+                        view(n, num_heads, h * h_kv, self.qk_embed_dim)
+
+                    energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
+                    energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)
+
+                    energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
+                    energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)
+
+                    energy += energy_x + energy_y
+
+            energy = energy.view(n, num_heads, h * w, h_kv * w_kv)
+
+        if self.spatial_range >= 0:
+            cur_local_constraint_map = \
+                self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
+                contiguous().\
+                view(1, 1, h*w, h_kv*w_kv)
+
+            energy = energy.masked_fill_(cur_local_constraint_map,
+                                         float('-inf'))
+
+        attention = F.softmax(energy, 3)
+
+        proj_value = self.value_conv(x_kv)
+        proj_value_reshape = proj_value.\
+            view((n, num_heads, self.v_dim, h_kv * w_kv)).\
+            permute(0, 1, 3, 2)
+
+        out = torch.matmul(attention, proj_value_reshape).\
+            permute(0, 1, 3, 2).\
+            contiguous().\
+            view(n, self.v_dim * self.num_heads, h, w)
+
+        out = self.proj_conv(out)
+
+        # output is downsampled, upsample back to input size
+        if self.q_downsample is not None:
+            out = F.interpolate(
+                out,
+                size=x_input.shape[2:],
+                mode='bilinear',
+                align_corners=False)
+
+        out = self.gamma * out + x_input
+        return out
+
+    def init_weights(self):
+        for m in self.modules():
+            if hasattr(m, 'kaiming_init') and m.kaiming_init:
+                kaiming_init(
+                    m,
+                    mode='fan_in',
+                    nonlinearity='leaky_relu',
+                    bias=0,
+                    distribution='uniform',
+                    a=1)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/hsigmoid.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/hsigmoid.py
new file mode 100644
index 0000000000000000000000000000000000000000..0956ef0ea3291bc6cb5a8cc3b6e5844f31442978
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/hsigmoid.py
@@ -0,0 +1,33 @@
+import torch.nn as nn
+
+from .registry import ACTIVATION_LAYERS
+
+
+@ACTIVATION_LAYERS.register_module()
+class HSigmoid(nn.Module):
+    """Hard Sigmoid Module. Apply the hard sigmoid function:
+    Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value)
+    Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1)
+
+    Args:
+        bias (float): Bias of the input feature map. Default: 1.0.
+        divisor (float): Divisor of the input feature map. Default: 2.0.
+        min_value (float): Lower bound value. Default: 0.0.
+        max_value (float): Upper bound value. Default: 1.0.
+
+    Returns:
+        Tensor: The output tensor.
+    """
+
+    def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0):
+        super(HSigmoid, self).__init__()
+        self.bias = bias
+        self.divisor = divisor
+        assert self.divisor != 0
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def forward(self, x):
+        x = (x + self.bias) / self.divisor
+
+        return x.clamp_(self.min_value, self.max_value)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/hswish.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/hswish.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1a22adbca185e84b6fd044f88d88f64c4196d78
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/hswish.py
@@ -0,0 +1,28 @@
+import torch.nn as nn
+
+from .registry import ACTIVATION_LAYERS
+
+
+@ACTIVATION_LAYERS.register_module()
+class HSwish(nn.Module):
+    """Hard Swish Module.
+
+    This module applies the hard swish function:
+
+    .. math::
+        Hswish(x) = x * ReLU6(x + 3) / 6
+
+    Args:
+        inplace (bool): can optionally do the operation in-place.
+            Default: False.
+
+    Returns:
+        Tensor: The output tensor.
+    """
+
+    def __init__(self, inplace=False):
+        super(HSwish, self).__init__()
+        self.act = nn.ReLU6(inplace)
+
+    def forward(self, x):
+        return x * self.act(x + 3) / 6
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/non_local.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/non_local.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ee0656653d8af00cd8edc2296df14a7f20d7c6f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/non_local.py
@@ -0,0 +1,305 @@
+from abc import ABCMeta
+
+import torch
+import torch.nn as nn
+
+from ..utils import constant_init, normal_init
+from .conv_module import ConvModule
+from .registry import PLUGIN_LAYERS
+
+
+class _NonLocalNd(nn.Module, metaclass=ABCMeta):
+    """Basic Non-local module.
+
+    This module is proposed in
+    "Non-local Neural Networks"
+    Paper reference: https://arxiv.org/abs/1711.07971
+    Code reference: https://github.com/AlexHex7/Non-local_pytorch
+
+    Args:
+        in_channels (int): Channels of the input feature map.
+        reduction (int): Channel reduction ratio. Default: 2.
+        use_scale (bool): Whether to scale pairwise_weight by
+            `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`.
+            Default: True.
+        conv_cfg (None | dict): The config dict for convolution layers.
+            If not specified, it will use `nn.Conv2d` for convolution layers.
+            Default: None.
+        norm_cfg (None | dict): The config dict for normalization layers.
+            Default: None. (This parameter is only applicable to conv_out.)
+        mode (str): Options are `gaussian`, `concatenation`,
+            `embedded_gaussian` and `dot_product`. Default: embedded_gaussian.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 reduction=2,
+                 use_scale=True,
+                 conv_cfg=None,
+                 norm_cfg=None,
+                 mode='embedded_gaussian',
+                 **kwargs):
+        super(_NonLocalNd, self).__init__()
+        self.in_channels = in_channels
+        self.reduction = reduction
+        self.use_scale = use_scale
+        self.inter_channels = max(in_channels // reduction, 1)
+        self.mode = mode
+
+        if mode not in [
+                'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation'
+        ]:
+            raise ValueError("Mode should be in 'gaussian', 'concatenation', "
+                             f"'embedded_gaussian' or 'dot_product', but got "
+                             f'{mode} instead.')
+
+        # g, theta, phi are defaulted as `nn.ConvNd`.
+        # Here we use ConvModule for potential usage.
+        self.g = ConvModule(
+            self.in_channels,
+            self.inter_channels,
+            kernel_size=1,
+            conv_cfg=conv_cfg,
+            act_cfg=None)
+        self.conv_out = ConvModule(
+            self.inter_channels,
+            self.in_channels,
+            kernel_size=1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+
+        if self.mode != 'gaussian':
+            self.theta = ConvModule(
+                self.in_channels,
+                self.inter_channels,
+                kernel_size=1,
+                conv_cfg=conv_cfg,
+                act_cfg=None)
+            self.phi = ConvModule(
+                self.in_channels,
+                self.inter_channels,
+                kernel_size=1,
+                conv_cfg=conv_cfg,
+                act_cfg=None)
+
+        if self.mode == 'concatenation':
+            self.concat_project = ConvModule(
+                self.inter_channels * 2,
+                1,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+                act_cfg=dict(type='ReLU'))
+
+        self.init_weights(**kwargs)
+
+    def init_weights(self, std=0.01, zeros_init=True):
+        if self.mode != 'gaussian':
+            for m in [self.g, self.theta, self.phi]:
+                normal_init(m.conv, std=std)
+        else:
+            normal_init(self.g.conv, std=std)
+        if zeros_init:
+            if self.conv_out.norm_cfg is None:
+                constant_init(self.conv_out.conv, 0)
+            else:
+                constant_init(self.conv_out.norm, 0)
+        else:
+            if self.conv_out.norm_cfg is None:
+                normal_init(self.conv_out.conv, std=std)
+            else:
+                normal_init(self.conv_out.norm, std=std)
+
+    def gaussian(self, theta_x, phi_x):
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        pairwise_weight = pairwise_weight.softmax(dim=-1)
+        return pairwise_weight
+
+    def embedded_gaussian(self, theta_x, phi_x):
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        if self.use_scale:
+            # theta_x.shape[-1] is `self.inter_channels`
+            pairwise_weight /= theta_x.shape[-1]**0.5
+        pairwise_weight = pairwise_weight.softmax(dim=-1)
+        return pairwise_weight
+
+    def dot_product(self, theta_x, phi_x):
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        pairwise_weight /= pairwise_weight.shape[-1]
+        return pairwise_weight
+
+    def concatenation(self, theta_x, phi_x):
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        h = theta_x.size(2)
+        w = phi_x.size(3)
+        theta_x = theta_x.repeat(1, 1, 1, w)
+        phi_x = phi_x.repeat(1, 1, h, 1)
+
+        concat_feature = torch.cat([theta_x, phi_x], dim=1)
+        pairwise_weight = self.concat_project(concat_feature)
+        n, _, h, w = pairwise_weight.size()
+        pairwise_weight = pairwise_weight.view(n, h, w)
+        pairwise_weight /= pairwise_weight.shape[-1]
+
+        return pairwise_weight
+
+    def forward(self, x):
+        # Assume `reduction = 1`, then `inter_channels = C`
+        # or `inter_channels = C` when `mode="gaussian"`
+
+        # NonLocal1d x: [N, C, H]
+        # NonLocal2d x: [N, C, H, W]
+        # NonLocal3d x: [N, C, T, H, W]
+        n = x.size(0)
+
+        # NonLocal1d g_x: [N, H, C]
+        # NonLocal2d g_x: [N, HxW, C]
+        # NonLocal3d g_x: [N, TxHxW, C]
+        g_x = self.g(x).view(n, self.inter_channels, -1)
+        g_x = g_x.permute(0, 2, 1)
+
+        # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H]
+        # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW]
+        # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW]
+        if self.mode == 'gaussian':
+            theta_x = x.view(n, self.in_channels, -1)
+            theta_x = theta_x.permute(0, 2, 1)
+            if self.sub_sample:
+                phi_x = self.phi(x).view(n, self.in_channels, -1)
+            else:
+                phi_x = x.view(n, self.in_channels, -1)
+        elif self.mode == 'concatenation':
+            theta_x = self.theta(x).view(n, self.inter_channels, -1, 1)
+            phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)
+        else:
+            theta_x = self.theta(x).view(n, self.inter_channels, -1)
+            theta_x = theta_x.permute(0, 2, 1)
+            phi_x = self.phi(x).view(n, self.inter_channels, -1)
+
+        pairwise_func = getattr(self, self.mode)
+        # NonLocal1d pairwise_weight: [N, H, H]
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+        pairwise_weight = pairwise_func(theta_x, phi_x)
+
+        # NonLocal1d y: [N, H, C]
+        # NonLocal2d y: [N, HxW, C]
+        # NonLocal3d y: [N, TxHxW, C]
+        y = torch.matmul(pairwise_weight, g_x)
+        # NonLocal1d y: [N, C, H]
+        # NonLocal2d y: [N, C, H, W]
+        # NonLocal3d y: [N, C, T, H, W]
+        y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels,
+                                                    *x.size()[2:])
+
+        output = x + self.conv_out(y)
+
+        return output
+
+
+class NonLocal1d(_NonLocalNd):
+    """1D Non-local module.
+
+    Args:
+        in_channels (int): Same as `NonLocalND`.
+        sub_sample (bool): Whether to apply max pooling after pairwise
+            function (Note that the `sub_sample` is applied on spatial only).
+            Default: False.
+        conv_cfg (None | dict): Same as `NonLocalND`.
+            Default: dict(type='Conv1d').
+    """
+
+    def __init__(self,
+                 in_channels,
+                 sub_sample=False,
+                 conv_cfg=dict(type='Conv1d'),
+                 **kwargs):
+        super(NonLocal1d, self).__init__(
+            in_channels, conv_cfg=conv_cfg, **kwargs)
+
+        self.sub_sample = sub_sample
+
+        if sub_sample:
+            max_pool_layer = nn.MaxPool1d(kernel_size=2)
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            if self.mode != 'gaussian':
+                self.phi = nn.Sequential(self.phi, max_pool_layer)
+            else:
+                self.phi = max_pool_layer
+
+
+@PLUGIN_LAYERS.register_module()
+class NonLocal2d(_NonLocalNd):
+    """2D Non-local module.
+
+    Args:
+        in_channels (int): Same as `NonLocalND`.
+        sub_sample (bool): Whether to apply max pooling after pairwise
+            function (Note that the `sub_sample` is applied on spatial only).
+            Default: False.
+        conv_cfg (None | dict): Same as `NonLocalND`.
+            Default: dict(type='Conv2d').
+    """
+
+    _abbr_ = 'nonlocal_block'
+
+    def __init__(self,
+                 in_channels,
+                 sub_sample=False,
+                 conv_cfg=dict(type='Conv2d'),
+                 **kwargs):
+        super(NonLocal2d, self).__init__(
+            in_channels, conv_cfg=conv_cfg, **kwargs)
+
+        self.sub_sample = sub_sample
+
+        if sub_sample:
+            max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            if self.mode != 'gaussian':
+                self.phi = nn.Sequential(self.phi, max_pool_layer)
+            else:
+                self.phi = max_pool_layer
+
+
+class NonLocal3d(_NonLocalNd):
+    """3D Non-local module.
+
+    Args:
+        in_channels (int): Same as `NonLocalND`.
+        sub_sample (bool): Whether to apply max pooling after pairwise
+            function (Note that the `sub_sample` is applied on spatial only).
+            Default: False.
+        conv_cfg (None | dict): Same as `NonLocalND`.
+            Default: dict(type='Conv3d').
+    """
+
+    def __init__(self,
+                 in_channels,
+                 sub_sample=False,
+                 conv_cfg=dict(type='Conv3d'),
+                 **kwargs):
+        super(NonLocal3d, self).__init__(
+            in_channels, conv_cfg=conv_cfg, **kwargs)
+        self.sub_sample = sub_sample
+
+        if sub_sample:
+            max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            if self.mode != 'gaussian':
+                self.phi = nn.Sequential(self.phi, max_pool_layer)
+            else:
+                self.phi = max_pool_layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/norm.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..00352258537fa923d75a5fa8a3511f36c1680a78
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/norm.py
@@ -0,0 +1,143 @@
+import inspect
+
+import torch.nn as nn
+
+from mmcv.utils import is_tuple_of
+from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm
+from .registry import NORM_LAYERS
+
+NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d)
+NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d)
+NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d)
+NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d)
+NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm)
+NORM_LAYERS.register_module('GN', module=nn.GroupNorm)
+NORM_LAYERS.register_module('LN', module=nn.LayerNorm)
+NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d)
+NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d)
+NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d)
+NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d)
+
+
+def infer_abbr(class_type):
+    """Infer abbreviation from the class name.
+
+    When we build a norm layer with `build_norm_layer()`, we want to preserve
+    the norm type in variable names, e.g, self.bn1, self.gn. This method will
+    infer the abbreviation to map class types to abbreviations.
+
+    Rule 1: If the class has the property "_abbr_", return the property.
+    Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or
+    InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and
+    "in" respectively.
+    Rule 3: If the class name contains "batch", "group", "layer" or "instance",
+    the abbreviation of this layer will be "bn", "gn", "ln" and "in"
+    respectively.
+    Rule 4: Otherwise, the abbreviation falls back to "norm".
+
+    Args:
+        class_type (type): The norm layer type.
+
+    Returns:
+        str: The inferred abbreviation.
+    """
+    if not inspect.isclass(class_type):
+        raise TypeError(
+            f'class_type must be a type, but got {type(class_type)}')
+    if hasattr(class_type, '_abbr_'):
+        return class_type._abbr_
+    if issubclass(class_type, _InstanceNorm):  # IN is a subclass of BN
+        return 'in'
+    elif issubclass(class_type, _BatchNorm):
+        return 'bn'
+    elif issubclass(class_type, nn.GroupNorm):
+        return 'gn'
+    elif issubclass(class_type, nn.LayerNorm):
+        return 'ln'
+    else:
+        class_name = class_type.__name__.lower()
+        if 'batch' in class_name:
+            return 'bn'
+        elif 'group' in class_name:
+            return 'gn'
+        elif 'layer' in class_name:
+            return 'ln'
+        elif 'instance' in class_name:
+            return 'in'
+        else:
+            return 'norm_layer'
+
+
+def build_norm_layer(cfg, num_features, postfix=''):
+    """Build normalization layer.
+
+    Args:
+        cfg (dict): The norm layer config, which should contain:
+
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate a norm layer.
+            - requires_grad (bool, optional): Whether stop gradient updates.
+        num_features (int): Number of input channels.
+        postfix (int | str): The postfix to be appended into norm abbreviation
+            to create named layer.
+
+    Returns:
+        (str, nn.Module): The first element is the layer name consisting of
+            abbreviation and postfix, e.g., bn1, gn. The second element is the
+            created norm layer.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError('cfg must be a dict')
+    if 'type' not in cfg:
+        raise KeyError('the cfg dict must contain the key "type"')
+    cfg_ = cfg.copy()
+
+    layer_type = cfg_.pop('type')
+    if layer_type not in NORM_LAYERS:
+        raise KeyError(f'Unrecognized norm type {layer_type}')
+
+    norm_layer = NORM_LAYERS.get(layer_type)
+    abbr = infer_abbr(norm_layer)
+
+    assert isinstance(postfix, (int, str))
+    name = abbr + str(postfix)
+
+    requires_grad = cfg_.pop('requires_grad', True)
+    cfg_.setdefault('eps', 1e-5)
+    if layer_type != 'GN':
+        layer = norm_layer(num_features, **cfg_)
+        if layer_type == 'SyncBN':
+            layer._specify_ddp_gpu_num(1)
+    else:
+        assert 'num_groups' in cfg_
+        layer = norm_layer(num_channels=num_features, **cfg_)
+
+    for param in layer.parameters():
+        param.requires_grad = requires_grad
+
+    return name, layer
+
+
+def is_norm(layer, exclude=None):
+    """Check if a layer is a normalization layer.
+
+    Args:
+        layer (nn.Module): The layer to be checked.
+        exclude (type | tuple[type]): Types to be excluded.
+
+    Returns:
+        bool: Whether the layer is a norm layer.
+    """
+    if exclude is not None:
+        if not isinstance(exclude, tuple):
+            exclude = (exclude, )
+        if not is_tuple_of(exclude, type):
+            raise TypeError(
+                f'"exclude" must be either None or type or a tuple of types, '
+                f'but got {type(exclude)}: {exclude}')
+
+    if exclude and isinstance(layer, exclude):
+        return False
+
+    all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm)
+    return isinstance(layer, all_norm_bases)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/padding.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/padding.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7e82129c1f1a2bf57c86d50d46fe2f65a6d8f75
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/padding.py
@@ -0,0 +1,35 @@
+import torch.nn as nn
+
+from .registry import PADDING_LAYERS
+
+PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d)
+PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d)
+PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d)
+
+
+def build_padding_layer(cfg, *args, **kwargs):
+    """Build padding layer.
+
+    Args:
+        cfg (None or dict): The padding layer config, which should contain:
+            - type (str): Layer type.
+            - layer args: Args needed to instantiate a padding layer.
+
+    Returns:
+        nn.Module: Created padding layer.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError('cfg must be a dict')
+    if 'type' not in cfg:
+        raise KeyError('the cfg dict must contain the key "type"')
+
+    cfg_ = cfg.copy()
+    padding_type = cfg_.pop('type')
+    if padding_type not in PADDING_LAYERS:
+        raise KeyError(f'Unrecognized padding type {padding_type}.')
+    else:
+        padding_layer = PADDING_LAYERS.get(padding_type)
+
+    layer = padding_layer(*args, **kwargs, **cfg_)
+
+    return layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/plugin.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..07c010d4053174dd41107aa654ea67e82b46a25c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/plugin.py
@@ -0,0 +1,88 @@
+import inspect
+import platform
+
+from .registry import PLUGIN_LAYERS
+
+if platform.system() == 'Windows':
+    import regex as re
+else:
+    import re
+
+
+def infer_abbr(class_type):
+    """Infer abbreviation from the class name.
+
+    This method will infer the abbreviation to map class types to
+    abbreviations.
+
+    Rule 1: If the class has the property "abbr", return the property.
+    Rule 2: Otherwise, the abbreviation falls back to snake case of class
+    name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``.
+
+    Args:
+        class_type (type): The norm layer type.
+
+    Returns:
+        str: The inferred abbreviation.
+    """
+
+    def camel2snack(word):
+        """Convert camel case word into snack case.
+
+        Modified from `inflection lib
+        <https://inflection.readthedocs.io/en/latest/#inflection.underscore>`_.
+
+        Example::
+
+            >>> camel2snack("FancyBlock")
+            'fancy_block'
+        """
+
+        word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word)
+        word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word)
+        word = word.replace('-', '_')
+        return word.lower()
+
+    if not inspect.isclass(class_type):
+        raise TypeError(
+            f'class_type must be a type, but got {type(class_type)}')
+    if hasattr(class_type, '_abbr_'):
+        return class_type._abbr_
+    else:
+        return camel2snack(class_type.__name__)
+
+
+def build_plugin_layer(cfg, postfix='', **kwargs):
+    """Build plugin layer.
+
+    Args:
+        cfg (None or dict): cfg should contain:
+            type (str): identify plugin layer type.
+            layer args: args needed to instantiate a plugin layer.
+        postfix (int, str): appended into norm abbreviation to
+            create named layer. Default: ''.
+
+    Returns:
+        tuple[str, nn.Module]:
+            name (str): abbreviation + postfix
+            layer (nn.Module): created plugin layer
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError('cfg must be a dict')
+    if 'type' not in cfg:
+        raise KeyError('the cfg dict must contain the key "type"')
+    cfg_ = cfg.copy()
+
+    layer_type = cfg_.pop('type')
+    if layer_type not in PLUGIN_LAYERS:
+        raise KeyError(f'Unrecognized plugin type {layer_type}')
+
+    plugin_layer = PLUGIN_LAYERS.get(layer_type)
+    abbr = infer_abbr(plugin_layer)
+
+    assert isinstance(postfix, (int, str))
+    name = abbr + str(postfix)
+
+    layer = plugin_layer(**kwargs, **cfg_)
+
+    return name, layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/registry.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..368db7992b578b88da29c755feb96c3f8293eeb8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/registry.py
@@ -0,0 +1,8 @@
+from mmcv.utils import Registry
+
+CONV_LAYERS = Registry('conv layer')
+NORM_LAYERS = Registry('norm layer')
+ACTIVATION_LAYERS = Registry('activation layer')
+PADDING_LAYERS = Registry('padding layer')
+UPSAMPLE_LAYERS = Registry('upsample layer')
+PLUGIN_LAYERS = Registry('plugin layer')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/scale.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/scale.py
new file mode 100644
index 0000000000000000000000000000000000000000..be7109b82403b1f15de017c738a76e13d0ecaae7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/scale.py
@@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+
+
+class Scale(nn.Module):
+    """A learnable scale parameter.
+
+    This layer scales the input by a learnable factor. It multiplies a
+    learnable scale parameter of shape (1,) with input of any shape.
+
+    Args:
+        scale (float): Initial value of scale factor. Default: 1.0
+    """
+
+    def __init__(self, scale=1.0):
+        super(Scale, self).__init__()
+        self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
+
+    def forward(self, x):
+        return x * self.scale
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/swish.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/swish.py
new file mode 100644
index 0000000000000000000000000000000000000000..f396dc59b7bfa29b5b414bff45bb8393dffd839e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/swish.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+from .registry import ACTIVATION_LAYERS
+
+
+@ACTIVATION_LAYERS.register_module()
+class Swish(nn.Module):
+    """Swish Module.
+
+    This module applies the swish function:
+
+    .. math::
+        Swish(x) = x * Sigmoid(x)
+
+    Returns:
+        Tensor: The output tensor.
+    """
+
+    def __init__(self):
+        super(Swish, self).__init__()
+
+    def forward(self, x):
+        return x * torch.sigmoid(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/upsample.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/upsample.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1388c39bf6c1693c16987682299938b82e3c311
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/upsample.py
@@ -0,0 +1,83 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..utils import xavier_init
+from .registry import UPSAMPLE_LAYERS
+
+UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample)
+UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample)
+
+
+@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle')
+class PixelShufflePack(nn.Module):
+    """Pixel Shuffle upsample layer.
+
+    This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to
+    achieve a simple upsampling with pixel shuffle.
+
+    Args:
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        scale_factor (int): Upsample ratio.
+        upsample_kernel (int): Kernel size of the conv layer to expand the
+            channels.
+    """
+
+    def __init__(self, in_channels, out_channels, scale_factor,
+                 upsample_kernel):
+        super(PixelShufflePack, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.scale_factor = scale_factor
+        self.upsample_kernel = upsample_kernel
+        self.upsample_conv = nn.Conv2d(
+            self.in_channels,
+            self.out_channels * scale_factor * scale_factor,
+            self.upsample_kernel,
+            padding=(self.upsample_kernel - 1) // 2)
+        self.init_weights()
+
+    def init_weights(self):
+        xavier_init(self.upsample_conv, distribution='uniform')
+
+    def forward(self, x):
+        x = self.upsample_conv(x)
+        x = F.pixel_shuffle(x, self.scale_factor)
+        return x
+
+
+def build_upsample_layer(cfg, *args, **kwargs):
+    """Build upsample layer.
+
+    Args:
+        cfg (dict): The upsample layer config, which should contain:
+
+            - type (str): Layer type.
+            - scale_factor (int): Upsample ratio, which is not applicable to
+                deconv.
+            - layer args: Args needed to instantiate a upsample layer.
+        args (argument list): Arguments passed to the ``__init__``
+            method of the corresponding conv layer.
+        kwargs (keyword arguments): Keyword arguments passed to the
+            ``__init__`` method of the corresponding conv layer.
+
+    Returns:
+        nn.Module: Created upsample layer.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
+    if 'type' not in cfg:
+        raise KeyError(
+            f'the cfg dict must contain the key "type", but got {cfg}')
+    cfg_ = cfg.copy()
+
+    layer_type = cfg_.pop('type')
+    if layer_type not in UPSAMPLE_LAYERS:
+        raise KeyError(f'Unrecognized upsample type {layer_type}')
+    else:
+        upsample = UPSAMPLE_LAYERS.get(layer_type)
+
+    if upsample is nn.Upsample:
+        cfg_['mode'] = layer_type
+    layer = upsample(*args, **kwargs, **cfg_)
+    return layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/wrappers.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..a464f86dc1eee2e614621d73dd9d8254028c769d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/bricks/wrappers.py
@@ -0,0 +1,179 @@
+r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py  # noqa: E501
+
+Wrap some nn modules to support empty tensor input. Currently, these wrappers
+are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask
+heads are trained on only positive RoIs.
+"""
+import math
+
+import torch
+import torch.nn as nn
+from torch.nn.modules.utils import _pair, _triple
+
+from .registry import CONV_LAYERS, UPSAMPLE_LAYERS
+
+if torch.__version__ == 'parrots':
+    TORCH_VERSION = torch.__version__
+else:
+    # torch.__version__ could be 1.3.1+cu92, we only need the first two
+    # for comparison
+    TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])
+
+
+def obsolete_torch_version(torch_version, version_threshold):
+    return torch_version == 'parrots' or torch_version <= version_threshold
+
+
+class NewEmptyTensorOp(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, x, new_shape):
+        ctx.shape = x.shape
+        return x.new_empty(new_shape)
+
+    @staticmethod
+    def backward(ctx, grad):
+        shape = ctx.shape
+        return NewEmptyTensorOp.apply(grad, shape), None
+
+
+@CONV_LAYERS.register_module('Conv', force=True)
+class Conv2d(nn.Conv2d):
+
+    def forward(self, x):
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+            out_shape = [x.shape[0], self.out_channels]
+            for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size,
+                                     self.padding, self.stride, self.dilation):
+                o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
+                out_shape.append(o)
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            if self.training:
+                # produce dummy gradient to avoid DDP warning.
+                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+                return empty + dummy
+            else:
+                return empty
+
+        return super().forward(x)
+
+
+@CONV_LAYERS.register_module('Conv3d', force=True)
+class Conv3d(nn.Conv3d):
+
+    def forward(self, x):
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+            out_shape = [x.shape[0], self.out_channels]
+            for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size,
+                                     self.padding, self.stride, self.dilation):
+                o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
+                out_shape.append(o)
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            if self.training:
+                # produce dummy gradient to avoid DDP warning.
+                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+                return empty + dummy
+            else:
+                return empty
+
+        return super().forward(x)
+
+
+@CONV_LAYERS.register_module()
+@CONV_LAYERS.register_module('deconv')
+@UPSAMPLE_LAYERS.register_module('deconv', force=True)
+class ConvTranspose2d(nn.ConvTranspose2d):
+
+    def forward(self, x):
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+            out_shape = [x.shape[0], self.out_channels]
+            for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size,
+                                         self.padding, self.stride,
+                                         self.dilation, self.output_padding):
+                out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            if self.training:
+                # produce dummy gradient to avoid DDP warning.
+                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+                return empty + dummy
+            else:
+                return empty
+
+        return super().forward(x)
+
+
+@CONV_LAYERS.register_module()
+@CONV_LAYERS.register_module('deconv3d')
+@UPSAMPLE_LAYERS.register_module('deconv3d', force=True)
+class ConvTranspose3d(nn.ConvTranspose3d):
+
+    def forward(self, x):
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+            out_shape = [x.shape[0], self.out_channels]
+            for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size,
+                                         self.padding, self.stride,
+                                         self.dilation, self.output_padding):
+                out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            if self.training:
+                # produce dummy gradient to avoid DDP warning.
+                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+                return empty + dummy
+            else:
+                return empty
+
+        return super().forward(x)
+
+
+class MaxPool2d(nn.MaxPool2d):
+
+    def forward(self, x):
+        # PyTorch 1.7 does not support empty tensor inference yet
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 7)):
+            out_shape = list(x.shape[:2])
+            for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size),
+                                     _pair(self.padding), _pair(self.stride),
+                                     _pair(self.dilation)):
+                o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
+                o = math.ceil(o) if self.ceil_mode else math.floor(o)
+                out_shape.append(o)
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            return empty
+
+        return super().forward(x)
+
+
+class MaxPool3d(nn.MaxPool3d):
+
+    def forward(self, x):
+        # PyTorch 1.7 does not support empty tensor inference yet
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 7)):
+            out_shape = list(x.shape[:2])
+            for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size),
+                                     _triple(self.padding),
+                                     _triple(self.stride),
+                                     _triple(self.dilation)):
+                o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
+                o = math.ceil(o) if self.ceil_mode else math.floor(o)
+                out_shape.append(o)
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            return empty
+
+        return super().forward(x)
+
+
+class Linear(torch.nn.Linear):
+
+    def forward(self, x):
+        # empty tensor forward of Linear layer is supported in Pytorch 1.6
+        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)):
+            out_shape = [x.shape[0], self.out_features]
+            empty = NewEmptyTensorOp.apply(x, out_shape)
+            if self.training:
+                # produce dummy gradient to avoid DDP warning.
+                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+                return empty + dummy
+            else:
+                return empty
+
+        return super().forward(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/resnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fe9a3320a46d39d7422929f59340e2e511c2e27
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/resnet.py
@@ -0,0 +1,316 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import logging
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+
+from .utils import constant_init, kaiming_init
+
+
+def conv3x3(in_planes, out_planes, stride=1, dilation=1):
+    """3x3 convolution with padding."""
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        dilation=dilation,
+        bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 dilation=1,
+                 downsample=None,
+                 style='pytorch',
+                 with_cp=False):
+        super(BasicBlock, self).__init__()
+        assert style in ['pytorch', 'caffe']
+        self.conv1 = conv3x3(inplanes, planes, stride, dilation)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+        assert not with_cp
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 dilation=1,
+                 downsample=None,
+                 style='pytorch',
+                 with_cp=False):
+        """Bottleneck block.
+
+        If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
+        it is "caffe", the stride-two layer is the first 1x1 conv layer.
+        """
+        super(Bottleneck, self).__init__()
+        assert style in ['pytorch', 'caffe']
+        if style == 'pytorch':
+            conv1_stride = 1
+            conv2_stride = stride
+        else:
+            conv1_stride = stride
+            conv2_stride = 1
+        self.conv1 = nn.Conv2d(
+            inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
+        self.conv2 = nn.Conv2d(
+            planes,
+            planes,
+            kernel_size=3,
+            stride=conv2_stride,
+            padding=dilation,
+            dilation=dilation,
+            bias=False)
+
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(
+            planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+        self.with_cp = with_cp
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            residual = x
+
+            out = self.conv1(x)
+            out = self.bn1(out)
+            out = self.relu(out)
+
+            out = self.conv2(out)
+            out = self.bn2(out)
+            out = self.relu(out)
+
+            out = self.conv3(out)
+            out = self.bn3(out)
+
+            if self.downsample is not None:
+                residual = self.downsample(x)
+
+            out += residual
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        out = self.relu(out)
+
+        return out
+
+
+def make_res_layer(block,
+                   inplanes,
+                   planes,
+                   blocks,
+                   stride=1,
+                   dilation=1,
+                   style='pytorch',
+                   with_cp=False):
+    downsample = None
+    if stride != 1 or inplanes != planes * block.expansion:
+        downsample = nn.Sequential(
+            nn.Conv2d(
+                inplanes,
+                planes * block.expansion,
+                kernel_size=1,
+                stride=stride,
+                bias=False),
+            nn.BatchNorm2d(planes * block.expansion),
+        )
+
+    layers = []
+    layers.append(
+        block(
+            inplanes,
+            planes,
+            stride,
+            dilation,
+            downsample,
+            style=style,
+            with_cp=with_cp))
+    inplanes = planes * block.expansion
+    for _ in range(1, blocks):
+        layers.append(
+            block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp))
+
+    return nn.Sequential(*layers)
+
+
+class ResNet(nn.Module):
+    """ResNet backbone.
+
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        num_stages (int): Resnet stages, normally 4.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
+            running stats (mean and var).
+        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+    """
+
+    arch_settings = {
+        18: (BasicBlock, (2, 2, 2, 2)),
+        34: (BasicBlock, (3, 4, 6, 3)),
+        50: (Bottleneck, (3, 4, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3))
+    }
+
+    def __init__(self,
+                 depth,
+                 num_stages=4,
+                 strides=(1, 2, 2, 2),
+                 dilations=(1, 1, 1, 1),
+                 out_indices=(0, 1, 2, 3),
+                 style='pytorch',
+                 frozen_stages=-1,
+                 bn_eval=True,
+                 bn_frozen=False,
+                 with_cp=False):
+        super(ResNet, self).__init__()
+        if depth not in self.arch_settings:
+            raise KeyError(f'invalid depth {depth} for resnet')
+        assert num_stages >= 1 and num_stages <= 4
+        block, stage_blocks = self.arch_settings[depth]
+        stage_blocks = stage_blocks[:num_stages]
+        assert len(strides) == len(dilations) == num_stages
+        assert max(out_indices) < num_stages
+
+        self.out_indices = out_indices
+        self.style = style
+        self.frozen_stages = frozen_stages
+        self.bn_eval = bn_eval
+        self.bn_frozen = bn_frozen
+        self.with_cp = with_cp
+
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(
+            3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+        self.res_layers = []
+        for i, num_blocks in enumerate(stage_blocks):
+            stride = strides[i]
+            dilation = dilations[i]
+            planes = 64 * 2**i
+            res_layer = make_res_layer(
+                block,
+                self.inplanes,
+                planes,
+                num_blocks,
+                stride=stride,
+                dilation=dilation,
+                style=self.style,
+                with_cp=with_cp)
+            self.inplanes = planes * block.expansion
+            layer_name = f'layer{i + 1}'
+            self.add_module(layer_name, res_layer)
+            self.res_layers.append(layer_name)
+
+        self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1)
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            from ..runner import load_checkpoint
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, nn.BatchNorm2d):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        outs = []
+        for i, layer_name in enumerate(self.res_layers):
+            res_layer = getattr(self, layer_name)
+            x = res_layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        if len(outs) == 1:
+            return outs[0]
+        else:
+            return tuple(outs)
+
+    def train(self, mode=True):
+        super(ResNet, self).train(mode)
+        if self.bn_eval:
+            for m in self.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eval()
+                    if self.bn_frozen:
+                        for params in m.parameters():
+                            params.requires_grad = False
+        if mode and self.frozen_stages >= 0:
+            for param in self.conv1.parameters():
+                param.requires_grad = False
+            for param in self.bn1.parameters():
+                param.requires_grad = False
+            self.bn1.eval()
+            self.bn1.weight.requires_grad = False
+            self.bn1.bias.requires_grad = False
+            for i in range(1, self.frozen_stages + 1):
+                mod = getattr(self, f'layer{i}')
+                mod.eval()
+                for param in mod.parameters():
+                    param.requires_grad = False
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..99ec08a78602a6e3f57737dfcaa62b95f1456462
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .flops_counter import get_model_complexity_info
+from .fuse_conv_bn import fuse_conv_bn
+from .weight_init import (INITIALIZERS, ConstantInit, KaimingInit, NormalInit,
+                          PretrainedInit, UniformInit, XavierInit,
+                          bias_init_with_prob, caffe2_xavier_init,
+                          constant_init, initialize, kaiming_init, normal_init,
+                          uniform_init, xavier_init)
+
+__all__ = [
+    'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init',
+    'constant_init', 'kaiming_init', 'normal_init', 'uniform_init',
+    'xavier_init', 'fuse_conv_bn', 'initialize', 'INITIALIZERS',
+    'ConstantInit', 'XavierInit', 'NormalInit', 'UniformInit', 'KaimingInit',
+    'PretrainedInit'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/flops_counter.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/flops_counter.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c89ebebeab9b72c93862a82e3bfa3a0179146d4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/flops_counter.py
@@ -0,0 +1,592 @@
+# Modified from flops-counter.pytorch by Vladislav Sovrasov
+# original repo: https://github.com/sovrasov/flops-counter.pytorch
+
+# MIT License
+
+# Copyright (c) 2018 Vladislav Sovrasov
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import sys
+from functools import partial
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+import mmcv
+
+
+def get_model_complexity_info(model,
+                              input_shape,
+                              print_per_layer_stat=True,
+                              as_strings=True,
+                              input_constructor=None,
+                              flush=False,
+                              ost=sys.stdout):
+    """Get complexity information of a model.
+
+    This method can calculate FLOPs and parameter counts of a model with
+    corresponding input shape. It can also print complexity information for
+    each layer in a model.
+
+    Supported layers are listed as below:
+        - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``.
+        - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``,
+            ``nn.ReLU6``.
+        - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``,
+            ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``,
+            ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``,
+            ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``,
+            ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``.
+        - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``,
+            ``nn.BatchNorm3d``.
+        - Linear: ``nn.Linear``.
+        - Deconvolution: ``nn.ConvTranspose2d``.
+        - Upsample: ``nn.Upsample``.
+
+    Args:
+        model (nn.Module): The model for complexity calculation.
+        input_shape (tuple): Input shape used for calculation.
+        print_per_layer_stat (bool): Whether to print complexity information
+            for each layer in a model. Default: True.
+        as_strings (bool): Output FLOPs and params counts in a string form.
+            Default: True.
+        input_constructor (None | callable): If specified, it takes a callable
+            method that generates input. otherwise, it will generate a random
+            tensor with input shape to calculate FLOPs. Default: None.
+        flush (bool): same as that in :func:`print`. Default: False.
+        ost (stream): same as ``file`` param in :func:`print`.
+            Default: sys.stdout.
+
+    Returns:
+        tuple[float | str]: If ``as_strings`` is set to True, it will return
+            FLOPs and parameter counts in a string format. otherwise, it will
+            return those in a float number format.
+    """
+    assert type(input_shape) is tuple
+    assert len(input_shape) >= 1
+    assert isinstance(model, nn.Module)
+    flops_model = add_flops_counting_methods(model)
+    flops_model.eval()
+    flops_model.start_flops_count()
+    if input_constructor:
+        input = input_constructor(input_shape)
+        _ = flops_model(**input)
+    else:
+        try:
+            batch = torch.ones(()).new_empty(
+                (1, *input_shape),
+                dtype=next(flops_model.parameters()).dtype,
+                device=next(flops_model.parameters()).device)
+        except StopIteration:
+            # Avoid StopIteration for models which have no parameters,
+            # like `nn.Relu()`, `nn.AvgPool2d`, etc.
+            batch = torch.ones(()).new_empty((1, *input_shape))
+
+        _ = flops_model(batch)
+
+    flops_count, params_count = flops_model.compute_average_flops_cost()
+    if print_per_layer_stat:
+        print_model_with_flops(
+            flops_model, flops_count, params_count, ost=ost, flush=flush)
+    flops_model.stop_flops_count()
+
+    if as_strings:
+        return flops_to_string(flops_count), params_to_string(params_count)
+
+    return flops_count, params_count
+
+
+def flops_to_string(flops, units='GFLOPs', precision=2):
+    """Convert FLOPs number into a string.
+
+    Note that Here we take a multiply-add counts as one FLOP.
+
+    Args:
+        flops (float): FLOPs number to be converted.
+        units (str | None): Converted FLOPs units. Options are None, 'GFLOPs',
+            'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically
+            choose the most suitable unit for FLOPs. Default: 'GFLOPs'.
+        precision (int): Digit number after the decimal point. Default: 2.
+
+    Returns:
+        str: The converted FLOPs number with units.
+
+    Examples:
+        >>> flops_to_string(1e9)
+        '1.0 GFLOPs'
+        >>> flops_to_string(2e5, 'MFLOPs')
+        '0.2 MFLOPs'
+        >>> flops_to_string(3e-9, None)
+        '3e-09 FLOPs'
+    """
+    if units is None:
+        if flops // 10**9 > 0:
+            return str(round(flops / 10.**9, precision)) + ' GFLOPs'
+        elif flops // 10**6 > 0:
+            return str(round(flops / 10.**6, precision)) + ' MFLOPs'
+        elif flops // 10**3 > 0:
+            return str(round(flops / 10.**3, precision)) + ' KFLOPs'
+        else:
+            return str(flops) + ' FLOPs'
+    else:
+        if units == 'GFLOPs':
+            return str(round(flops / 10.**9, precision)) + ' ' + units
+        elif units == 'MFLOPs':
+            return str(round(flops / 10.**6, precision)) + ' ' + units
+        elif units == 'KFLOPs':
+            return str(round(flops / 10.**3, precision)) + ' ' + units
+        else:
+            return str(flops) + ' FLOPs'
+
+
+def params_to_string(num_params, units=None, precision=2):
+    """Convert parameter number into a string.
+
+    Args:
+        num_params (float): Parameter number to be converted.
+        units (str | None): Converted FLOPs units. Options are None, 'M',
+            'K' and ''. If set to None, it will automatically choose the most
+            suitable unit for Parameter number. Default: None.
+        precision (int): Digit number after the decimal point. Default: 2.
+
+    Returns:
+        str: The converted parameter number with units.
+
+    Examples:
+        >>> params_to_string(1e9)
+        '1000.0 M'
+        >>> params_to_string(2e5)
+        '200.0 k'
+        >>> params_to_string(3e-9)
+        '3e-09'
+    """
+    if units is None:
+        if num_params // 10**6 > 0:
+            return str(round(num_params / 10**6, precision)) + ' M'
+        elif num_params // 10**3:
+            return str(round(num_params / 10**3, precision)) + ' k'
+        else:
+            return str(num_params)
+    else:
+        if units == 'M':
+            return str(round(num_params / 10.**6, precision)) + ' ' + units
+        elif units == 'K':
+            return str(round(num_params / 10.**3, precision)) + ' ' + units
+        else:
+            return str(num_params)
+
+
+def print_model_with_flops(model,
+                           total_flops,
+                           total_params,
+                           units='GFLOPs',
+                           precision=3,
+                           ost=sys.stdout,
+                           flush=False):
+    """Print a model with FLOPs for each layer.
+
+    Args:
+        model (nn.Module): The model to be printed.
+        total_flops (float): Total FLOPs of the model.
+        total_params (float): Total parameter counts of the model.
+        units (str | None): Converted FLOPs units. Default: 'GFLOPs'.
+        precision (int): Digit number after the decimal point. Default: 3.
+        ost (stream): same as `file` param in :func:`print`.
+            Default: sys.stdout.
+        flush (bool): same as that in :func:`print`. Default: False.
+
+    Example:
+        >>> class ExampleModel(nn.Module):
+
+        >>> def __init__(self):
+        >>>     super().__init__()
+        >>>     self.conv1 = nn.Conv2d(3, 8, 3)
+        >>>     self.conv2 = nn.Conv2d(8, 256, 3)
+        >>>     self.conv3 = nn.Conv2d(256, 8, 3)
+        >>>     self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        >>>     self.flatten = nn.Flatten()
+        >>>     self.fc = nn.Linear(8, 1)
+
+        >>> def forward(self, x):
+        >>>     x = self.conv1(x)
+        >>>     x = self.conv2(x)
+        >>>     x = self.conv3(x)
+        >>>     x = self.avg_pool(x)
+        >>>     x = self.flatten(x)
+        >>>     x = self.fc(x)
+        >>>     return x
+
+        >>> model = ExampleModel()
+        >>> x = (3, 16, 16)
+        to print the complexity inforamtion state for each layer, you can use
+        >>> get_model_complexity_info(model, x)
+        or directly use
+        >>> print_model_with_flops(model, 4579784.0, 37361)
+        ExampleModel(
+          0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs,
+          (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1))  # noqa: E501
+          (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1))
+          (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1))
+          (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1))
+          (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, )
+          (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True)
+        )
+    """
+
+    def accumulate_params(self):
+        if is_supported_instance(self):
+            return self.__params__
+        else:
+            sum = 0
+            for m in self.children():
+                sum += m.accumulate_params()
+            return sum
+
+    def accumulate_flops(self):
+        if is_supported_instance(self):
+            return self.__flops__ / model.__batch_counter__
+        else:
+            sum = 0
+            for m in self.children():
+                sum += m.accumulate_flops()
+            return sum
+
+    def flops_repr(self):
+        accumulated_num_params = self.accumulate_params()
+        accumulated_flops_cost = self.accumulate_flops()
+        return ', '.join([
+            params_to_string(
+                accumulated_num_params, units='M', precision=precision),
+            '{:.3%} Params'.format(accumulated_num_params / total_params),
+            flops_to_string(
+                accumulated_flops_cost, units=units, precision=precision),
+            '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops),
+            self.original_extra_repr()
+        ])
+
+    def add_extra_repr(m):
+        m.accumulate_flops = accumulate_flops.__get__(m)
+        m.accumulate_params = accumulate_params.__get__(m)
+        flops_extra_repr = flops_repr.__get__(m)
+        if m.extra_repr != flops_extra_repr:
+            m.original_extra_repr = m.extra_repr
+            m.extra_repr = flops_extra_repr
+            assert m.extra_repr != m.original_extra_repr
+
+    def del_extra_repr(m):
+        if hasattr(m, 'original_extra_repr'):
+            m.extra_repr = m.original_extra_repr
+            del m.original_extra_repr
+        if hasattr(m, 'accumulate_flops'):
+            del m.accumulate_flops
+
+    model.apply(add_extra_repr)
+    print(model, file=ost, flush=flush)
+    model.apply(del_extra_repr)
+
+
+def get_model_parameters_number(model):
+    """Calculate parameter number of a model.
+
+    Args:
+        model (nn.module): The model for parameter number calculation.
+
+    Returns:
+        float: Parameter number of the model.
+    """
+    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    return num_params
+
+
+def add_flops_counting_methods(net_main_module):
+    # adding additional methods to the existing module object,
+    # this is done this way so that each function has access to self object
+    net_main_module.start_flops_count = start_flops_count.__get__(
+        net_main_module)
+    net_main_module.stop_flops_count = stop_flops_count.__get__(
+        net_main_module)
+    net_main_module.reset_flops_count = reset_flops_count.__get__(
+        net_main_module)
+    net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__(  # noqa: E501
+        net_main_module)
+
+    net_main_module.reset_flops_count()
+
+    return net_main_module
+
+
+def compute_average_flops_cost(self):
+    """Compute average FLOPs cost.
+
+    A method to compute average FLOPs cost, which will be available after
+    `add_flops_counting_methods()` is called on a desired net object.
+
+    Returns:
+        float: Current mean flops consumption per image.
+    """
+    batches_count = self.__batch_counter__
+    flops_sum = 0
+    for module in self.modules():
+        if is_supported_instance(module):
+            flops_sum += module.__flops__
+    params_sum = get_model_parameters_number(self)
+    return flops_sum / batches_count, params_sum
+
+
+def start_flops_count(self):
+    """Activate the computation of mean flops consumption per image.
+
+    A method to activate the computation of mean flops consumption per image.
+    which will be available after ``add_flops_counting_methods()`` is called on
+    a desired net object. It should be called before running the network.
+    """
+    add_batch_counter_hook_function(self)
+
+    def add_flops_counter_hook_function(module):
+        if is_supported_instance(module):
+            if hasattr(module, '__flops_handle__'):
+                return
+
+            else:
+                handle = module.register_forward_hook(
+                    get_modules_mapping()[type(module)])
+
+            module.__flops_handle__ = handle
+
+    self.apply(partial(add_flops_counter_hook_function))
+
+
+def stop_flops_count(self):
+    """Stop computing the mean flops consumption per image.
+
+    A method to stop computing the mean flops consumption per image, which will
+    be available after ``add_flops_counting_methods()`` is called on a desired
+    net object. It can be called to pause the computation whenever.
+    """
+    remove_batch_counter_hook_function(self)
+    self.apply(remove_flops_counter_hook_function)
+
+
+def reset_flops_count(self):
+    """Reset statistics computed so far.
+
+    A method to Reset computed statistics, which will be available after
+    `add_flops_counting_methods()` is called on a desired net object.
+    """
+    add_batch_counter_variables_or_reset(self)
+    self.apply(add_flops_counter_variable_or_reset)
+
+
+# ---- Internal functions
+def empty_flops_counter_hook(module, input, output):
+    module.__flops__ += 0
+
+
+def upsample_flops_counter_hook(module, input, output):
+    output_size = output[0]
+    batch_size = output_size.shape[0]
+    output_elements_count = batch_size
+    for val in output_size.shape[1:]:
+        output_elements_count *= val
+    module.__flops__ += int(output_elements_count)
+
+
+def relu_flops_counter_hook(module, input, output):
+    active_elements_count = output.numel()
+    module.__flops__ += int(active_elements_count)
+
+
+def linear_flops_counter_hook(module, input, output):
+    input = input[0]
+    output_last_dim = output.shape[
+        -1]  # pytorch checks dimensions, so here we don't care much
+    module.__flops__ += int(np.prod(input.shape) * output_last_dim)
+
+
+def pool_flops_counter_hook(module, input, output):
+    input = input[0]
+    module.__flops__ += int(np.prod(input.shape))
+
+
+def bn_flops_counter_hook(module, input, output):
+    input = input[0]
+
+    batch_flops = np.prod(input.shape)
+    if module.affine:
+        batch_flops *= 2
+    module.__flops__ += int(batch_flops)
+
+
+def deconv_flops_counter_hook(conv_module, input, output):
+    # Can have multiple inputs, getting the first one
+    input = input[0]
+
+    batch_size = input.shape[0]
+    input_height, input_width = input.shape[2:]
+
+    kernel_height, kernel_width = conv_module.kernel_size
+    in_channels = conv_module.in_channels
+    out_channels = conv_module.out_channels
+    groups = conv_module.groups
+
+    filters_per_channel = out_channels // groups
+    conv_per_position_flops = (
+        kernel_height * kernel_width * in_channels * filters_per_channel)
+
+    active_elements_count = batch_size * input_height * input_width
+    overall_conv_flops = conv_per_position_flops * active_elements_count
+    bias_flops = 0
+    if conv_module.bias is not None:
+        output_height, output_width = output.shape[2:]
+        bias_flops = out_channels * batch_size * output_height * output_height
+    overall_flops = overall_conv_flops + bias_flops
+
+    conv_module.__flops__ += int(overall_flops)
+
+
+def conv_flops_counter_hook(conv_module, input, output):
+    # Can have multiple inputs, getting the first one
+    input = input[0]
+
+    batch_size = input.shape[0]
+    output_dims = list(output.shape[2:])
+
+    kernel_dims = list(conv_module.kernel_size)
+    in_channels = conv_module.in_channels
+    out_channels = conv_module.out_channels
+    groups = conv_module.groups
+
+    filters_per_channel = out_channels // groups
+    conv_per_position_flops = int(
+        np.prod(kernel_dims)) * in_channels * filters_per_channel
+
+    active_elements_count = batch_size * int(np.prod(output_dims))
+
+    overall_conv_flops = conv_per_position_flops * active_elements_count
+
+    bias_flops = 0
+
+    if conv_module.bias is not None:
+
+        bias_flops = out_channels * active_elements_count
+
+    overall_flops = overall_conv_flops + bias_flops
+
+    conv_module.__flops__ += int(overall_flops)
+
+
+def batch_counter_hook(module, input, output):
+    batch_size = 1
+    if len(input) > 0:
+        # Can have multiple inputs, getting the first one
+        input = input[0]
+        batch_size = len(input)
+    else:
+        pass
+        print('Warning! No positional inputs found for a module, '
+              'assuming batch size is 1.')
+    module.__batch_counter__ += batch_size
+
+
+def add_batch_counter_variables_or_reset(module):
+
+    module.__batch_counter__ = 0
+
+
+def add_batch_counter_hook_function(module):
+    if hasattr(module, '__batch_counter_handle__'):
+        return
+
+    handle = module.register_forward_hook(batch_counter_hook)
+    module.__batch_counter_handle__ = handle
+
+
+def remove_batch_counter_hook_function(module):
+    if hasattr(module, '__batch_counter_handle__'):
+        module.__batch_counter_handle__.remove()
+        del module.__batch_counter_handle__
+
+
+def add_flops_counter_variable_or_reset(module):
+    if is_supported_instance(module):
+        if hasattr(module, '__flops__') or hasattr(module, '__params__'):
+            print('Warning: variables __flops__ or __params__ are already '
+                  'defined for the module' + type(module).__name__ +
+                  ' ptflops can affect your code!')
+        module.__flops__ = 0
+        module.__params__ = get_model_parameters_number(module)
+
+
+def is_supported_instance(module):
+    if type(module) in get_modules_mapping():
+        return True
+    return False
+
+
+def remove_flops_counter_hook_function(module):
+    if is_supported_instance(module):
+        if hasattr(module, '__flops_handle__'):
+            module.__flops_handle__.remove()
+            del module.__flops_handle__
+
+
+def get_modules_mapping():
+    return {
+        # convolutions
+        nn.Conv1d: conv_flops_counter_hook,
+        nn.Conv2d: conv_flops_counter_hook,
+        mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook,
+        nn.Conv3d: conv_flops_counter_hook,
+        mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook,
+        # activations
+        nn.ReLU: relu_flops_counter_hook,
+        nn.PReLU: relu_flops_counter_hook,
+        nn.ELU: relu_flops_counter_hook,
+        nn.LeakyReLU: relu_flops_counter_hook,
+        nn.ReLU6: relu_flops_counter_hook,
+        # poolings
+        nn.MaxPool1d: pool_flops_counter_hook,
+        nn.AvgPool1d: pool_flops_counter_hook,
+        nn.AvgPool2d: pool_flops_counter_hook,
+        nn.MaxPool2d: pool_flops_counter_hook,
+        mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook,
+        nn.MaxPool3d: pool_flops_counter_hook,
+        mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook,
+        nn.AvgPool3d: pool_flops_counter_hook,
+        nn.AdaptiveMaxPool1d: pool_flops_counter_hook,
+        nn.AdaptiveAvgPool1d: pool_flops_counter_hook,
+        nn.AdaptiveMaxPool2d: pool_flops_counter_hook,
+        nn.AdaptiveAvgPool2d: pool_flops_counter_hook,
+        nn.AdaptiveMaxPool3d: pool_flops_counter_hook,
+        nn.AdaptiveAvgPool3d: pool_flops_counter_hook,
+        # BNs
+        nn.BatchNorm1d: bn_flops_counter_hook,
+        nn.BatchNorm2d: bn_flops_counter_hook,
+        nn.BatchNorm3d: bn_flops_counter_hook,
+        # FC
+        nn.Linear: linear_flops_counter_hook,
+        mmcv.cnn.bricks.Linear: linear_flops_counter_hook,
+        # Upscale
+        nn.Upsample: upsample_flops_counter_hook,
+        # Deconvolution
+        nn.ConvTranspose2d: deconv_flops_counter_hook,
+        mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook,
+    }
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/fuse_conv_bn.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/fuse_conv_bn.py
new file mode 100644
index 0000000000000000000000000000000000000000..31578be9202d080c01c281d399036efa01a64d61
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/fuse_conv_bn.py
@@ -0,0 +1,58 @@
+import torch
+import torch.nn as nn
+
+
+def _fuse_conv_bn(conv, bn):
+    """Fuse conv and bn into one module.
+
+    Args:
+        conv (nn.Module): Conv to be fused.
+        bn (nn.Module): BN to be fused.
+
+    Returns:
+        nn.Module: Fused module.
+    """
+    conv_w = conv.weight
+    conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
+        bn.running_mean)
+
+    factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
+    conv.weight = nn.Parameter(conv_w *
+                               factor.reshape([conv.out_channels, 1, 1, 1]))
+    conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
+    return conv
+
+
+def fuse_conv_bn(module):
+    """Recursively fuse conv and bn in a module.
+
+    During inference, the functionary of batch norm layers is turned off
+    but only the mean and var alone channels are used, which exposes the
+    chance to fuse it with the preceding conv layers to save computations and
+    simplify network structures.
+
+    Args:
+        module (nn.Module): Module to be fused.
+
+    Returns:
+        nn.Module: Fused module.
+    """
+    last_conv = None
+    last_conv_name = None
+
+    for name, child in module.named_children():
+        if isinstance(child,
+                      (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)):
+            if last_conv is None:  # only fuse BN that is after Conv
+                continue
+            fused_conv = _fuse_conv_bn(last_conv, child)
+            module._modules[last_conv_name] = fused_conv
+            # To reduce changes, set BN as Identity instead of deleting it.
+            module._modules[name] = nn.Identity()
+            last_conv = None
+        elif isinstance(child, nn.Conv2d):
+            last_conv = child
+            last_conv_name = name
+        else:
+            fuse_conv_bn(child)
+    return module
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/weight_init.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/weight_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bd8c1bcdd9d54cecf1e25e1031ebe345b441fcf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/utils/weight_init.py
@@ -0,0 +1,429 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import numpy as np
+import torch.nn as nn
+
+from mmcv.utils import Registry, build_from_cfg, get_logger, print_log
+
+INITIALIZERS = Registry('initializer')
+
+
+def constant_init(module, val, bias=0):
+    if hasattr(module, 'weight') and module.weight is not None:
+        nn.init.constant_(module.weight, val)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+
+def xavier_init(module, gain=1, bias=0, distribution='normal'):
+    assert distribution in ['uniform', 'normal']
+    if hasattr(module, 'weight') and module.weight is not None:
+        if distribution == 'uniform':
+            nn.init.xavier_uniform_(module.weight, gain=gain)
+        else:
+            nn.init.xavier_normal_(module.weight, gain=gain)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+
+def normal_init(module, mean=0, std=1, bias=0):
+    if hasattr(module, 'weight') and module.weight is not None:
+        nn.init.normal_(module.weight, mean, std)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+
+def uniform_init(module, a=0, b=1, bias=0):
+    if hasattr(module, 'weight') and module.weight is not None:
+        nn.init.uniform_(module.weight, a, b)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+
+def kaiming_init(module,
+                 a=0,
+                 mode='fan_out',
+                 nonlinearity='relu',
+                 bias=0,
+                 distribution='normal'):
+    assert distribution in ['uniform', 'normal']
+    if hasattr(module, 'weight') and module.weight is not None:
+        if distribution == 'uniform':
+            nn.init.kaiming_uniform_(
+                module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
+        else:
+            nn.init.kaiming_normal_(
+                module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
+    if hasattr(module, 'bias') and module.bias is not None:
+        nn.init.constant_(module.bias, bias)
+
+
+def caffe2_xavier_init(module, bias=0):
+    # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
+    # Acknowledgment to FAIR's internal code
+    kaiming_init(
+        module,
+        a=1,
+        mode='fan_in',
+        nonlinearity='leaky_relu',
+        bias=bias,
+        distribution='uniform')
+
+
+def bias_init_with_prob(prior_prob):
+    """initialize conv/fc bias value according to giving probability."""
+    bias_init = float(-np.log((1 - prior_prob) / prior_prob))
+    return bias_init
+
+
+class BaseInit(object):
+
+    def __init__(self, *, bias=0, bias_prob=None, layer=None):
+        if not isinstance(bias, (int, float)):
+            raise TypeError(f'bias must be a numbel, but got a {type(bias)}')
+
+        if bias_prob is not None:
+            if not isinstance(bias_prob, float):
+                raise TypeError(f'bias_prob type must be float, \
+                    but got {type(bias_prob)}')
+
+        if layer is not None:
+            if not isinstance(layer, (str, list)):
+                raise TypeError(f'layer must be a str or a list of str, \
+                    but got a {type(layer)}')
+
+        if bias_prob is not None:
+            self.bias = bias_init_with_prob(bias_prob)
+        else:
+            self.bias = bias
+        self.layer = [layer] if isinstance(layer, str) else layer
+
+
+@INITIALIZERS.register_module(name='Constant')
+class ConstantInit(BaseInit):
+    """Initialize module parameters with constant values.
+
+    Args:
+        val (int | float): the value to fill the weights in the module with
+        bias (int | float): the value to fill the bias or
+        define initialization type for bias. Defaults to 0.
+        bias_prob (float, optional): the probability for bias initialization.
+            Defaults to None.
+        layer (str | list[str], optional): the layer will be initialized.
+            Defaults to None.
+    """
+
+    def __init__(self, val, **kwargs):
+        super().__init__(**kwargs)
+        self.val = val
+
+    def __call__(self, module):
+
+        def init(m):
+            if self.layer is None:
+                constant_init(m, self.val, self.bias)
+            else:
+                layername = m.__class__.__name__
+                for layer_ in self.layer:
+                    if layername == layer_:
+                        constant_init(m, self.val, self.bias)
+
+        module.apply(init)
+
+
+@INITIALIZERS.register_module(name='Xavier')
+class XavierInit(BaseInit):
+    r"""Initialize module parameters with values according to the method
+    described in `Understanding the difficulty of training deep feedforward
+    neural networks - Glorot, X. & Bengio, Y. (2010).
+    <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
+
+    Args:
+        gain (int | float): an optional scaling factor. Defaults to 1.
+        bias (int | float): the value to fill the bias or define
+            initialization type for bias. Defaults to 0.
+        bias_prob (float, optional): the probability for bias initialization.
+            Defaults to None.
+        distribution (str): distribution either be ``'normal'``
+            or ``'uniform'``. Defaults to ``'normal'``.
+        layer (str | list[str], optional): the layer will be initialized.
+            Defaults to None.
+    """
+
+    def __init__(self, gain=1, distribution='normal', **kwargs):
+        super().__init__(**kwargs)
+        self.gain = gain
+        self.distribution = distribution
+
+    def __call__(self, module):
+
+        def init(m):
+            if self.layer is None:
+                xavier_init(m, self.gain, self.bias, self.distribution)
+            else:
+                layername = m.__class__.__name__
+                for layer_ in self.layer:
+                    if layername == layer_:
+                        xavier_init(m, self.gain, self.bias, self.distribution)
+
+        module.apply(init)
+
+
+@INITIALIZERS.register_module(name='Normal')
+class NormalInit(BaseInit):
+    r"""Initialize module parameters with the values drawn from the normal
+    distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.
+
+    Args:
+        mean (int | float):the mean of the normal distribution. Defaults to 0.
+        std (int | float): the standard deviation of the normal distribution.
+            Defaults to 1.
+        bias (int | float): the value to fill the bias or define
+            initialization type for bias. Defaults to 0.
+        bias_prob (float, optional): the probability for bias initialization.
+            Defaults to None.
+        layer (str | list[str], optional): the layer will be initialized.
+            Defaults to None.
+
+    """
+
+    def __init__(self, mean=0, std=1, **kwargs):
+        super().__init__(**kwargs)
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, module):
+
+        def init(m):
+            if self.layer is None:
+                normal_init(m, self.mean, self.std, self.bias)
+            else:
+                layername = m.__class__.__name__
+                for layer_ in self.layer:
+                    if layername == layer_:
+                        normal_init(m, self.mean, self.std, self.bias)
+
+        module.apply(init)
+
+
+@INITIALIZERS.register_module(name='Uniform')
+class UniformInit(BaseInit):
+    r"""Initialize module parameters with values drawn from the uniform
+    distribution :math:`\mathcal{U}(a, b)`.
+
+    Args:
+        a (int | float): the lower bound of the uniform distribution.
+            Defaults to 0.
+        b (int | float): the upper bound of the uniform distribution.
+            Defaults to 1.
+        bias (int | float): the value to fill the bias or define
+            initialization type for bias. Defaults to 0.
+        bias_prob (float, optional): the probability for bias initialization.
+            Defaults to None.
+        layer (str | list[str], optional): the layer will be initialized.
+            Defaults to None.
+    """
+
+    def __init__(self, a=0, b=1, **kwargs):
+        super().__init__(**kwargs)
+        self.a = a
+        self.b = b
+
+    def __call__(self, module):
+
+        def init(m):
+            if self.layer is None:
+                uniform_init(m, self.a, self.b, self.bias)
+            else:
+                layername = m.__class__.__name__
+                for layer_ in self.layer:
+                    if layername == layer_:
+                        uniform_init(m, self.a, self.b, self.bias)
+
+        module.apply(init)
+
+
+@INITIALIZERS.register_module(name='Kaiming')
+class KaimingInit(BaseInit):
+    r"""Initialize module paramters with the valuse according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification - He, K. et al. (2015).
+    <https://www.cv-foundation.org/openaccess/content_iccv_2015/
+    papers/He_Delving_Deep_into_ICCV_2015_paper.pdf>`_
+
+    Args:
+        a (int | float): the negative slope of the rectifier used after this
+            layer (only used with ``'leaky_relu'``). Defaults to 0.
+        mode (str):  either ``'fan_in'`` or ``'fan_out'``. Choosing
+            ``'fan_in'`` preserves the magnitude of the variance of the weights
+            in the forward pass. Choosing ``'fan_out'`` preserves the
+            magnitudes in the backwards pass. Defaults to ``'fan_out'``.
+        nonlinearity (str): the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` .
+            Defaults to 'relu'.
+        bias (int | float): the value to fill the bias or define
+            initialization type for bias. Defaults to 0.
+        bias_prob (float, optional): the probability for bias initialization.
+            Defaults to None.
+        distribution (str): distribution either be ``'normal'`` or
+            ``'uniform'``. Defaults to ``'normal'``.
+        layer (str | list[str], optional): the layer will be initialized.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 a=0,
+                 mode='fan_out',
+                 nonlinearity='relu',
+                 distribution='normal',
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.a = a
+        self.mode = mode
+        self.nonlinearity = nonlinearity
+        self.distribution = distribution
+
+    def __call__(self, module):
+
+        def init(m):
+            if self.layer is None:
+                kaiming_init(m, self.a, self.mode, self.nonlinearity,
+                             self.bias, self.distribution)
+            else:
+                layername = m.__class__.__name__
+                for layer_ in self.layer:
+                    if layername == layer_:
+                        kaiming_init(m, self.a, self.mode, self.nonlinearity,
+                                     self.bias, self.distribution)
+
+        module.apply(init)
+
+
+@INITIALIZERS.register_module(name='Pretrained')
+class PretrainedInit(object):
+    """Initialize module by loading a pretrained model.
+
+    Args:
+        checkpoint (str): the checkpoint file of the pretrained model should
+            be load.
+        prefix (str, optional): the prefix of a sub-module in the pretrained
+            model. it is for loading a part of the pretrained model to
+            initialize. For example, if we would like to only load the
+            backbone of a detector model, we can set ``prefix='backbone.'``.
+            Defaults to None.
+    """
+
+    def __init__(self, checkpoint, prefix=None, map_location=None):
+        self.checkpoint = checkpoint
+        self.prefix = prefix
+        self.map_location = map_location
+
+    def __call__(self, module):
+        from mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint,
+                                 load_state_dict)
+        logger = get_logger('mmcv')
+        if self.prefix is None:
+            print_log(f'load model from: {self.checkpoint}', logger=logger)
+            load_checkpoint(
+                module,
+                self.checkpoint,
+                map_location=self.map_location,
+                strict=False,
+                logger=logger)
+        else:
+            print_log(
+                f'load {self.prefix} in model from: {self.checkpoint}',
+                logger=logger)
+            state_dict = _load_checkpoint_with_prefix(
+                self.prefix, self.checkpoint, map_location=self.map_location)
+            load_state_dict(module, state_dict, strict=False, logger=logger)
+
+
+def _initialize(module, cfg):
+    func = build_from_cfg(cfg, INITIALIZERS)
+    func(module)
+
+
+def _initialize_override(module, override):
+    if not isinstance(override, (dict, list)):
+        raise TypeError(f'override must be a dict or a list of dict, \
+                but got {type(override)}')
+
+    override = [override] if isinstance(override, dict) else override
+
+    for override_ in override:
+        name = override_.pop('name', None)
+        if hasattr(module, name):
+            _initialize(getattr(module, name), override_)
+        else:
+            raise RuntimeError(f'module did not have attribute {name}')
+
+
+def initialize(module, init_cfg):
+    """Initialize a module.
+
+    Args:
+        module (``torch.nn.Module``): the module will be initialized.
+        init_cfg (dict | list[dict]): initialization configuration dict to
+            define initializer. OpenMMLab has implemented 6 initializers
+            including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``,
+            ``Kaiming``, and ``Pretrained``.
+
+    Example:
+        >>> module = nn.Linear(2, 3, bias=True)
+        >>> init_cfg = dict(type='Constant', val =1 , bias =2)
+        >>> initialize(module, init_cfg)
+
+        >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2))
+        >>> # define key ``'layer'`` for initializing layer with different
+        >>> # configuration
+        >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1),
+                dict(type='Constant', layer='Linear', val=2)]
+        >>> initialize(module, init_cfg)
+
+        >>> # Omitting ``'layer'`` initialize module with same configuration
+        >>> init_cfg = dict(type='Constant', val=1, bias=2)
+        >>> initialize(module, init_cfg)
+
+        >>> # define key``'override'`` to initialize some specific override in
+        >>> # module
+        >>> class FooNet(nn.Module):
+        >>>     def __init__(self):
+        >>>         super().__init__()
+        >>>         self.feat = nn.Conv2d(3, 16, 3)
+        >>>         self.reg = nn.Conv2d(16, 10, 3)
+        >>>         self.cls = nn.Conv2d(16, 5, 3)
+        >>> model = FooNet()
+        >>> init_cfg = dict(type='Constant', val=1, bias=2,
+        >>>     override=dict(type='Constant', name='reg', val=3, bias=4))
+        >>> initialize(model, init_cfg)
+
+        >>> model = ResNet(depth=50)
+        >>> # Initialize weights with the pretrained model.
+        >>> init_cfg = dict(type='PretrainedInit',
+                checkpoint='torchvision://resnet50')
+        >>> initialize(model, init_cfg)
+
+        >>> # Intialize weights of a sub-module with the specific part of
+        >>> # a pretrained model by using "prefix".
+        >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\
+        >>>     'retinanet_r50_fpn_1x_coco/'\
+        >>>     'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'
+        >>> init_cfg = dict(type='Pretrained',
+                checkpoint=url, prefix='backbone.')
+    """
+    if not isinstance(init_cfg, (dict, list)):
+        raise TypeError(f'init_cfg must be a dict or a list of dict, \
+                but got {type(init_cfg)}')
+
+    if isinstance(init_cfg, dict):
+        init_cfg = [init_cfg]
+
+    for cfg in init_cfg:
+        override = cfg.pop('override', None)
+        _initialize(module, cfg)
+
+        if override is not None:
+            _initialize_override(module, override)
+        else:
+            # All attributes in module have same initialization.
+            pass
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/vgg.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..82f8ba10932703da198d7834e041afe2cfb9d346
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/cnn/vgg.py
@@ -0,0 +1,175 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import logging
+
+import torch.nn as nn
+
+from .utils import constant_init, kaiming_init, normal_init
+
+
+def conv3x3(in_planes, out_planes, dilation=1):
+    """3x3 convolution with padding."""
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        padding=dilation,
+        dilation=dilation)
+
+
+def make_vgg_layer(inplanes,
+                   planes,
+                   num_blocks,
+                   dilation=1,
+                   with_bn=False,
+                   ceil_mode=False):
+    layers = []
+    for _ in range(num_blocks):
+        layers.append(conv3x3(inplanes, planes, dilation))
+        if with_bn:
+            layers.append(nn.BatchNorm2d(planes))
+        layers.append(nn.ReLU(inplace=True))
+        inplanes = planes
+    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))
+
+    return layers
+
+
+class VGG(nn.Module):
+    """VGG backbone.
+
+    Args:
+        depth (int): Depth of vgg, from {11, 13, 16, 19}.
+        with_bn (bool): Use BatchNorm or not.
+        num_classes (int): number of classes for classification.
+        num_stages (int): VGG stages, normally 5.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
+            running stats (mean and var).
+        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+    """
+
+    arch_settings = {
+        11: (1, 1, 2, 2, 2),
+        13: (2, 2, 2, 2, 2),
+        16: (2, 2, 3, 3, 3),
+        19: (2, 2, 4, 4, 4)
+    }
+
+    def __init__(self,
+                 depth,
+                 with_bn=False,
+                 num_classes=-1,
+                 num_stages=5,
+                 dilations=(1, 1, 1, 1, 1),
+                 out_indices=(0, 1, 2, 3, 4),
+                 frozen_stages=-1,
+                 bn_eval=True,
+                 bn_frozen=False,
+                 ceil_mode=False,
+                 with_last_pool=True):
+        super(VGG, self).__init__()
+        if depth not in self.arch_settings:
+            raise KeyError(f'invalid depth {depth} for vgg')
+        assert num_stages >= 1 and num_stages <= 5
+        stage_blocks = self.arch_settings[depth]
+        self.stage_blocks = stage_blocks[:num_stages]
+        assert len(dilations) == num_stages
+        assert max(out_indices) <= num_stages
+
+        self.num_classes = num_classes
+        self.out_indices = out_indices
+        self.frozen_stages = frozen_stages
+        self.bn_eval = bn_eval
+        self.bn_frozen = bn_frozen
+
+        self.inplanes = 3
+        start_idx = 0
+        vgg_layers = []
+        self.range_sub_modules = []
+        for i, num_blocks in enumerate(self.stage_blocks):
+            num_modules = num_blocks * (2 + with_bn) + 1
+            end_idx = start_idx + num_modules
+            dilation = dilations[i]
+            planes = 64 * 2**i if i < 4 else 512
+            vgg_layer = make_vgg_layer(
+                self.inplanes,
+                planes,
+                num_blocks,
+                dilation=dilation,
+                with_bn=with_bn,
+                ceil_mode=ceil_mode)
+            vgg_layers.extend(vgg_layer)
+            self.inplanes = planes
+            self.range_sub_modules.append([start_idx, end_idx])
+            start_idx = end_idx
+        if not with_last_pool:
+            vgg_layers.pop(-1)
+            self.range_sub_modules[-1][1] -= 1
+        self.module_name = 'features'
+        self.add_module(self.module_name, nn.Sequential(*vgg_layers))
+
+        if self.num_classes > 0:
+            self.classifier = nn.Sequential(
+                nn.Linear(512 * 7 * 7, 4096),
+                nn.ReLU(True),
+                nn.Dropout(),
+                nn.Linear(4096, 4096),
+                nn.ReLU(True),
+                nn.Dropout(),
+                nn.Linear(4096, num_classes),
+            )
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            from ..runner import load_checkpoint
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, nn.BatchNorm2d):
+                    constant_init(m, 1)
+                elif isinstance(m, nn.Linear):
+                    normal_init(m, std=0.01)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        outs = []
+        vgg_layers = getattr(self, self.module_name)
+        for i in range(len(self.stage_blocks)):
+            for j in range(*self.range_sub_modules[i]):
+                vgg_layer = vgg_layers[j]
+                x = vgg_layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        if self.num_classes > 0:
+            x = x.view(x.size(0), -1)
+            x = self.classifier(x)
+            outs.append(x)
+        if len(outs) == 1:
+            return outs[0]
+        else:
+            return tuple(outs)
+
+    def train(self, mode=True):
+        super(VGG, self).train(mode)
+        if self.bn_eval:
+            for m in self.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eval()
+                    if self.bn_frozen:
+                        for params in m.parameters():
+                            params.requires_grad = False
+        vgg_layers = getattr(self, self.module_name)
+        if mode and self.frozen_stages >= 0:
+            for i in range(self.frozen_stages):
+                for j in range(*self.range_sub_modules[i]):
+                    mod = vgg_layers[j]
+                    mod.eval()
+                    for param in mod.parameters():
+                        param.requires_grad = False
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b307027ad973e024e1e081b410017395c4ca28db
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .file_client import BaseStorageBackend, FileClient
+from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
+from .io import dump, load, register_handler
+from .parse import dict_from_file, list_from_file
+
+__all__ = [
+    'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler',
+    'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler',
+    'list_from_file', 'dict_from_file'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/file_client.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/file_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..26a86a6cf27d5c9e13699024c53091ba1afa558b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/file_client.py
@@ -0,0 +1,295 @@
+import inspect
+from abc import ABCMeta, abstractmethod
+
+
+class BaseStorageBackend(metaclass=ABCMeta):
+    """Abstract class of storage backends.
+
+    All backends need to implement two apis: ``get()`` and ``get_text()``.
+    ``get()`` reads the file as a byte stream and ``get_text()`` reads the file
+    as texts.
+    """
+
+    @abstractmethod
+    def get(self, filepath):
+        pass
+
+    @abstractmethod
+    def get_text(self, filepath):
+        pass
+
+
+class CephBackend(BaseStorageBackend):
+    """Ceph storage backend.
+
+    Args:
+        path_mapping (dict|None): path mapping dict from local path to Petrel
+            path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath``
+            will be replaced by ``dst``. Default: None.
+    """
+
+    def __init__(self, path_mapping=None):
+        try:
+            import ceph
+        except ImportError:
+            raise ImportError('Please install ceph to enable CephBackend.')
+
+        self._client = ceph.S3Client()
+        assert isinstance(path_mapping, dict) or path_mapping is None
+        self.path_mapping = path_mapping
+
+    def get(self, filepath):
+        filepath = str(filepath)
+        if self.path_mapping is not None:
+            for k, v in self.path_mapping.items():
+                filepath = filepath.replace(k, v)
+        value = self._client.Get(filepath)
+        value_buf = memoryview(value)
+        return value_buf
+
+    def get_text(self, filepath):
+        raise NotImplementedError
+
+
+class PetrelBackend(BaseStorageBackend):
+    """Petrel storage backend (for internal use).
+
+    Args:
+        path_mapping (dict|None): path mapping dict from local path to Petrel
+            path. When `path_mapping={'src': 'dst'}`, `src` in `filepath` will
+            be replaced by `dst`. Default: None.
+        enable_mc (bool): whether to enable memcached support. Default: True.
+    """
+
+    def __init__(self, path_mapping=None, enable_mc=True):
+        try:
+            from petrel_client import client
+        except ImportError:
+            raise ImportError('Please install petrel_client to enable '
+                              'PetrelBackend.')
+
+        self._client = client.Client(enable_mc=enable_mc)
+        assert isinstance(path_mapping, dict) or path_mapping is None
+        self.path_mapping = path_mapping
+
+    def get(self, filepath):
+        filepath = str(filepath)
+        if self.path_mapping is not None:
+            for k, v in self.path_mapping.items():
+                filepath = filepath.replace(k, v)
+        value = self._client.Get(filepath)
+        value_buf = memoryview(value)
+        return value_buf
+
+    def get_text(self, filepath):
+        raise NotImplementedError
+
+
+class MemcachedBackend(BaseStorageBackend):
+    """Memcached storage backend.
+
+    Attributes:
+        server_list_cfg (str): Config file for memcached server list.
+        client_cfg (str): Config file for memcached client.
+        sys_path (str | None): Additional path to be appended to `sys.path`.
+            Default: None.
+    """
+
+    def __init__(self, server_list_cfg, client_cfg, sys_path=None):
+        if sys_path is not None:
+            import sys
+            sys.path.append(sys_path)
+        try:
+            import mc
+        except ImportError:
+            raise ImportError(
+                'Please install memcached to enable MemcachedBackend.')
+
+        self.server_list_cfg = server_list_cfg
+        self.client_cfg = client_cfg
+        self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg,
+                                                      self.client_cfg)
+        # mc.pyvector servers as a point which points to a memory cache
+        self._mc_buffer = mc.pyvector()
+
+    def get(self, filepath):
+        filepath = str(filepath)
+        import mc
+        self._client.Get(filepath, self._mc_buffer)
+        value_buf = mc.ConvertBuffer(self._mc_buffer)
+        return value_buf
+
+    def get_text(self, filepath):
+        raise NotImplementedError
+
+
+class LmdbBackend(BaseStorageBackend):
+    """Lmdb storage backend.
+
+    Args:
+        db_path (str): Lmdb database path.
+        readonly (bool, optional): Lmdb environment parameter. If True,
+            disallow any write operations. Default: True.
+        lock (bool, optional): Lmdb environment parameter. If False, when
+            concurrent access occurs, do not lock the database. Default: False.
+        readahead (bool, optional): Lmdb environment parameter. If False,
+            disable the OS filesystem readahead mechanism, which may improve
+            random read performance when a database is larger than RAM.
+            Default: False.
+
+    Attributes:
+        db_path (str): Lmdb database path.
+    """
+
+    def __init__(self,
+                 db_path,
+                 readonly=True,
+                 lock=False,
+                 readahead=False,
+                 **kwargs):
+        try:
+            import lmdb
+        except ImportError:
+            raise ImportError('Please install lmdb to enable LmdbBackend.')
+
+        self.db_path = str(db_path)
+        self._client = lmdb.open(
+            self.db_path,
+            readonly=readonly,
+            lock=lock,
+            readahead=readahead,
+            **kwargs)
+
+    def get(self, filepath):
+        """Get values according to the filepath.
+
+        Args:
+            filepath (str | obj:`Path`): Here, filepath is the lmdb key.
+        """
+        filepath = str(filepath)
+        with self._client.begin(write=False) as txn:
+            value_buf = txn.get(filepath.encode('ascii'))
+        return value_buf
+
+    def get_text(self, filepath):
+        raise NotImplementedError
+
+
+class HardDiskBackend(BaseStorageBackend):
+    """Raw hard disks storage backend."""
+
+    def get(self, filepath):
+        filepath = str(filepath)
+        with open(filepath, 'rb') as f:
+            value_buf = f.read()
+        return value_buf
+
+    def get_text(self, filepath):
+        filepath = str(filepath)
+        with open(filepath, 'r') as f:
+            value_buf = f.read()
+        return value_buf
+
+
+class FileClient:
+    """A general file client to access files in different backend.
+
+    The client loads a file or text in a specified backend from its path
+    and return it as a binary file. it can also register other backend
+    accessor with a given name and backend class.
+
+    Attributes:
+        backend (str): The storage backend type. Options are "disk", "ceph",
+            "memcached" and "lmdb".
+        client (:obj:`BaseStorageBackend`): The backend object.
+    """
+
+    _backends = {
+        'disk': HardDiskBackend,
+        'ceph': CephBackend,
+        'memcached': MemcachedBackend,
+        'lmdb': LmdbBackend,
+        'petrel': PetrelBackend,
+    }
+
+    def __init__(self, backend='disk', **kwargs):
+        if backend not in self._backends:
+            raise ValueError(
+                f'Backend {backend} is not supported. Currently supported ones'
+                f' are {list(self._backends.keys())}')
+        self.backend = backend
+        self.client = self._backends[backend](**kwargs)
+
+    @classmethod
+    def _register_backend(cls, name, backend, force=False):
+        if not isinstance(name, str):
+            raise TypeError('the backend name should be a string, '
+                            f'but got {type(name)}')
+        if not inspect.isclass(backend):
+            raise TypeError(
+                f'backend should be a class but got {type(backend)}')
+        if not issubclass(backend, BaseStorageBackend):
+            raise TypeError(
+                f'backend {backend} is not a subclass of BaseStorageBackend')
+        if not force and name in cls._backends:
+            raise KeyError(
+                f'{name} is already registered as a storage backend, '
+                'add "force=True" if you want to override it')
+
+        cls._backends[name] = backend
+
+    @classmethod
+    def register_backend(cls, name, backend=None, force=False):
+        """Register a backend to FileClient.
+
+        This method can be used as a normal class method or a decorator.
+
+        .. code-block:: python
+
+            class NewBackend(BaseStorageBackend):
+
+                def get(self, filepath):
+                    return filepath
+
+                def get_text(self, filepath):
+                    return filepath
+
+            FileClient.register_backend('new', NewBackend)
+
+        or
+
+        .. code-block:: python
+
+            @FileClient.register_backend('new')
+            class NewBackend(BaseStorageBackend):
+
+                def get(self, filepath):
+                    return filepath
+
+                def get_text(self, filepath):
+                    return filepath
+
+        Args:
+            name (str): The name of the registered backend.
+            backend (class, optional): The backend class to be registered,
+                which must be a subclass of :class:`BaseStorageBackend`.
+                When this method is used as a decorator, backend is None.
+                Defaults to None.
+            force (bool, optional): Whether to override the backend if the name
+                has already been registered. Defaults to False.
+        """
+        if backend is not None:
+            cls._register_backend(name, backend, force=force)
+            return
+
+        def _register(backend_cls):
+            cls._register_backend(name, backend_cls, force=force)
+            return backend_cls
+
+        return _register
+
+    def get(self, filepath):
+        return self.client.get(filepath)
+
+    def get_text(self, filepath):
+        return self.client.get_text(filepath)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fbc6ec92b18623cc4d3375a26d5977f77f497dc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .base import BaseFileHandler
+from .json_handler import JsonHandler
+from .pickle_handler import PickleHandler
+from .yaml_handler import YamlHandler
+
+__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/base.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..91f3fe1fbc6d588b2cb8e90efc5d11500f600298
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/base.py
@@ -0,0 +1,25 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from abc import ABCMeta, abstractmethod
+
+
+class BaseFileHandler(metaclass=ABCMeta):
+
+    @abstractmethod
+    def load_from_fileobj(self, file, **kwargs):
+        pass
+
+    @abstractmethod
+    def dump_to_fileobj(self, obj, file, **kwargs):
+        pass
+
+    @abstractmethod
+    def dump_to_str(self, obj, **kwargs):
+        pass
+
+    def load_from_path(self, filepath, mode='r', **kwargs):
+        with open(filepath, mode) as f:
+            return self.load_from_fileobj(f, **kwargs)
+
+    def dump_to_path(self, obj, filepath, mode='w', **kwargs):
+        with open(filepath, mode) as f:
+            self.dump_to_fileobj(obj, f, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/json_handler.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/json_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..d92c397f14b081757e910d5f454aec7f5f74c246
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/json_handler.py
@@ -0,0 +1,36 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import json
+
+import numpy as np
+
+from .base import BaseFileHandler
+
+
+def set_default(obj):
+    """Set default json values for non-serializable values.
+
+    It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list.
+    It also converts ``np.generic`` (including ``np.int32``, ``np.float32``,
+    etc.) into plain numbers of plain python built-in types.
+    """
+    if isinstance(obj, (set, range)):
+        return list(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.generic):
+        return obj.item()
+    raise TypeError(f'{type(obj)} is unsupported for json dump')
+
+
+class JsonHandler(BaseFileHandler):
+
+    def load_from_fileobj(self, file):
+        return json.load(file)
+
+    def dump_to_fileobj(self, obj, file, **kwargs):
+        kwargs.setdefault('default', set_default)
+        json.dump(obj, file, **kwargs)
+
+    def dump_to_str(self, obj, **kwargs):
+        kwargs.setdefault('default', set_default)
+        return json.dumps(obj, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/pickle_handler.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/pickle_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..b22b1dc1dfd3aa994803ddc13f9b6745fb87c42c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/pickle_handler.py
@@ -0,0 +1,26 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import pickle
+
+from .base import BaseFileHandler
+
+
+class PickleHandler(BaseFileHandler):
+
+    def load_from_fileobj(self, file, **kwargs):
+        return pickle.load(file, **kwargs)
+
+    def load_from_path(self, filepath, **kwargs):
+        return super(PickleHandler, self).load_from_path(
+            filepath, mode='rb', **kwargs)
+
+    def dump_to_str(self, obj, **kwargs):
+        kwargs.setdefault('protocol', 2)
+        return pickle.dumps(obj, **kwargs)
+
+    def dump_to_fileobj(self, obj, file, **kwargs):
+        kwargs.setdefault('protocol', 2)
+        pickle.dump(obj, file, **kwargs)
+
+    def dump_to_path(self, obj, filepath, **kwargs):
+        super(PickleHandler, self).dump_to_path(
+            obj, filepath, mode='wb', **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/yaml_handler.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/yaml_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..c93eba8d36412ec0887ad8cdd52dcc470734b7c3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/handlers/yaml_handler.py
@@ -0,0 +1,24 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import yaml
+
+try:
+    from yaml import CLoader as Loader, CDumper as Dumper
+except ImportError:
+    from yaml import Loader, Dumper
+
+from .base import BaseFileHandler  # isort:skip
+
+
+class YamlHandler(BaseFileHandler):
+
+    def load_from_fileobj(self, file, **kwargs):
+        kwargs.setdefault('Loader', Loader)
+        return yaml.load(file, **kwargs)
+
+    def dump_to_fileobj(self, obj, file, **kwargs):
+        kwargs.setdefault('Dumper', Dumper)
+        yaml.dump(obj, file, **kwargs)
+
+    def dump_to_str(self, obj, **kwargs):
+        kwargs.setdefault('Dumper', Dumper)
+        return yaml.dump(obj, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/io.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..777df97a6ea80061ad73974bea1fad78ca26209f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/io.py
@@ -0,0 +1,112 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from pathlib import Path
+
+from ..utils import is_list_of, is_str
+from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
+
+file_handlers = {
+    'json': JsonHandler(),
+    'yaml': YamlHandler(),
+    'yml': YamlHandler(),
+    'pickle': PickleHandler(),
+    'pkl': PickleHandler()
+}
+
+
+def load(file, file_format=None, **kwargs):
+    """Load data from json/yaml/pickle files.
+
+    This method provides a unified api for loading data from serialized files.
+
+    Args:
+        file (str or :obj:`Path` or file-like object): Filename or a file-like
+            object.
+        file_format (str, optional): If not specified, the file format will be
+            inferred from the file extension, otherwise use the specified one.
+            Currently supported formats include "json", "yaml/yml" and
+            "pickle/pkl".
+
+    Returns:
+        The content from the file.
+    """
+    if isinstance(file, Path):
+        file = str(file)
+    if file_format is None and is_str(file):
+        file_format = file.split('.')[-1]
+    if file_format not in file_handlers:
+        raise TypeError(f'Unsupported format: {file_format}')
+
+    handler = file_handlers[file_format]
+    if is_str(file):
+        obj = handler.load_from_path(file, **kwargs)
+    elif hasattr(file, 'read'):
+        obj = handler.load_from_fileobj(file, **kwargs)
+    else:
+        raise TypeError('"file" must be a filepath str or a file-object')
+    return obj
+
+
+def dump(obj, file=None, file_format=None, **kwargs):
+    """Dump data to json/yaml/pickle strings or files.
+
+    This method provides a unified api for dumping data as strings or to files,
+    and also supports custom arguments for each file format.
+
+    Args:
+        obj (any): The python object to be dumped.
+        file (str or :obj:`Path` or file-like object, optional): If not
+            specified, then the object is dump to a str, otherwise to a file
+            specified by the filename or file-like object.
+        file_format (str, optional): Same as :func:`load`.
+
+    Returns:
+        bool: True for success, False otherwise.
+    """
+    if isinstance(file, Path):
+        file = str(file)
+    if file_format is None:
+        if is_str(file):
+            file_format = file.split('.')[-1]
+        elif file is None:
+            raise ValueError(
+                'file_format must be specified since file is None')
+    if file_format not in file_handlers:
+        raise TypeError(f'Unsupported format: {file_format}')
+
+    handler = file_handlers[file_format]
+    if file is None:
+        return handler.dump_to_str(obj, **kwargs)
+    elif is_str(file):
+        handler.dump_to_path(obj, file, **kwargs)
+    elif hasattr(file, 'write'):
+        handler.dump_to_fileobj(obj, file, **kwargs)
+    else:
+        raise TypeError('"file" must be a filename str or a file-object')
+
+
+def _register_handler(handler, file_formats):
+    """Register a handler for some file extensions.
+
+    Args:
+        handler (:obj:`BaseFileHandler`): Handler to be registered.
+        file_formats (str or list[str]): File formats to be handled by this
+            handler.
+    """
+    if not isinstance(handler, BaseFileHandler):
+        raise TypeError(
+            f'handler must be a child of BaseFileHandler, not {type(handler)}')
+    if isinstance(file_formats, str):
+        file_formats = [file_formats]
+    if not is_list_of(file_formats, str):
+        raise TypeError('file_formats must be a str or a list of str')
+    for ext in file_formats:
+        file_handlers[ext] = handler
+
+
+def register_handler(file_formats, **kwargs):
+
+    def wrap(cls):
+        _register_handler(cls(**kwargs), file_formats)
+        return cls
+
+    return wrap
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/parse.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/parse.py
new file mode 100644
index 0000000000000000000000000000000000000000..556c4cfd710091ef09611de5cbb39d154e0bc7bc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/fileio/parse.py
@@ -0,0 +1,51 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+def list_from_file(filename, prefix='', offset=0, max_num=0):
+    """Load a text file and parse the content as a list of strings.
+
+    Args:
+        filename (str): Filename.
+        prefix (str): The prefix to be inserted to the begining of each item.
+        offset (int): The offset of lines.
+        max_num (int): The maximum number of lines to be read,
+            zeros and negatives mean no limitation.
+
+    Returns:
+        list[str]: A list of strings.
+    """
+    cnt = 0
+    item_list = []
+    with open(filename, 'r') as f:
+        for _ in range(offset):
+            f.readline()
+        for line in f:
+            if max_num > 0 and cnt >= max_num:
+                break
+            item_list.append(prefix + line.rstrip('\n'))
+            cnt += 1
+    return item_list
+
+
+def dict_from_file(filename, key_type=str):
+    """Load a text file and parse the content as a dict.
+
+    Each line of the text file will be two or more columns splited by
+    whitespaces or tabs. The first column will be parsed as dict keys, and
+    the following columns will be parsed as dict values.
+
+    Args:
+        filename(str): Filename.
+        key_type(type): Type of the dict's keys. str is user by default and
+            type conversion will be performed if specified.
+
+    Returns:
+        dict: The parsed contents.
+    """
+    mapping = {}
+    with open(filename, 'r') as f:
+        for line in f:
+            items = line.rstrip('\n').split()
+            assert len(items) >= 2
+            key = key_type(items[0])
+            val = items[1:] if len(items) > 2 else items[1]
+            mapping[key] = val
+    return mapping
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..06d0c9d88309190d6dea5a41bccc30d5cea8480e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr,
+                         gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert,
+                         rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb)
+from .geometric import (imcrop, imflip, imflip_, impad, impad_to_multiple,
+                        imrescale, imresize, imresize_like, imrotate, imshear,
+                        imtranslate, rescale_size)
+from .io import imfrombytes, imread, imwrite, supported_backends, use_backend
+from .misc import tensor2imgs
+from .photometric import (adjust_brightness, adjust_color, adjust_contrast,
+                          clahe, imdenormalize, imequalize, iminvert,
+                          imnormalize, imnormalize_, lut_transform, posterize,
+                          solarize)
+
+__all__ = [
+    'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb',
+    'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale',
+    'imresize', 'imresize_like', 'rescale_size', 'imcrop', 'imflip', 'imflip_',
+    'impad', 'impad_to_multiple', 'imrotate', 'imfrombytes', 'imread',
+    'imwrite', 'supported_backends', 'use_backend', 'imdenormalize',
+    'imnormalize', 'imnormalize_', 'iminvert', 'posterize', 'solarize',
+    'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', 'tensor2imgs',
+    'imshear', 'imtranslate', 'adjust_color', 'imequalize',
+    'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/colorspace.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/colorspace.py
new file mode 100644
index 0000000000000000000000000000000000000000..56cfe657704faa8bff1c6d1345d473909226a9ae
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/colorspace.py
@@ -0,0 +1,306 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import cv2
+import numpy as np
+
+
+def imconvert(img, src, dst):
+    """Convert an image from the src colorspace to dst colorspace.
+
+    Args:
+        img (ndarray): The input image.
+        src (str): The source colorspace, e.g., 'rgb', 'hsv'.
+        dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
+
+    Returns:
+        ndarray: The converted image.
+    """
+    code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
+    out_img = cv2.cvtColor(img, code)
+    return out_img
+
+
+def bgr2gray(img, keepdim=False):
+    """Convert a BGR image to grayscale image.
+
+    Args:
+        img (ndarray): The input image.
+        keepdim (bool): If False (by default), then return the grayscale image
+            with 2 dims, otherwise 3 dims.
+
+    Returns:
+        ndarray: The converted grayscale image.
+    """
+    out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    if keepdim:
+        out_img = out_img[..., None]
+    return out_img
+
+
+def rgb2gray(img, keepdim=False):
+    """Convert a RGB image to grayscale image.
+
+    Args:
+        img (ndarray): The input image.
+        keepdim (bool): If False (by default), then return the grayscale image
+            with 2 dims, otherwise 3 dims.
+
+    Returns:
+        ndarray: The converted grayscale image.
+    """
+    out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    if keepdim:
+        out_img = out_img[..., None]
+    return out_img
+
+
+def gray2bgr(img):
+    """Convert a grayscale image to BGR image.
+
+    Args:
+        img (ndarray): The input image.
+
+    Returns:
+        ndarray: The converted BGR image.
+    """
+    img = img[..., None] if img.ndim == 2 else img
+    out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    return out_img
+
+
+def gray2rgb(img):
+    """Convert a grayscale image to RGB image.
+
+    Args:
+        img (ndarray): The input image.
+
+    Returns:
+        ndarray: The converted RGB image.
+    """
+    img = img[..., None] if img.ndim == 2 else img
+    out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    return out_img
+
+
+def _convert_input_type_range(img):
+    """Convert the type and range of the input image.
+
+    It converts the input image to np.float32 type and range of [0, 1].
+    It is mainly used for pre-processing the input image in colorspace
+    convertion functions such as rgb2ycbcr and ycbcr2rgb.
+
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+
+    Returns:
+        (ndarray): The converted image with type of np.float32 and range of
+            [0, 1].
+    """
+    img_type = img.dtype
+    img = img.astype(np.float32)
+    if img_type == np.float32:
+        pass
+    elif img_type == np.uint8:
+        img /= 255.
+    else:
+        raise TypeError('The img type should be np.float32 or np.uint8, '
+                        f'but got {img_type}')
+    return img
+
+
+def _convert_output_type_range(img, dst_type):
+    """Convert the type and range of the image according to dst_type.
+
+    It converts the image to desired type and range. If `dst_type` is np.uint8,
+    images will be converted to np.uint8 type with range [0, 255]. If
+    `dst_type` is np.float32, it converts the image to np.float32 type with
+    range [0, 1].
+    It is mainly used for post-processing images in colorspace convertion
+    functions such as rgb2ycbcr and ycbcr2rgb.
+
+    Args:
+        img (ndarray): The image to be converted with np.float32 type and
+            range [0, 255].
+        dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
+            converts the image to np.uint8 type with range [0, 255]. If
+            dst_type is np.float32, it converts the image to np.float32 type
+            with range [0, 1].
+
+    Returns:
+        (ndarray): The converted image with desired type and range.
+    """
+    if dst_type not in (np.uint8, np.float32):
+        raise TypeError('The dst_type should be np.float32 or np.uint8, '
+                        f'but got {dst_type}')
+    if dst_type == np.uint8:
+        img = img.round()
+    else:
+        img /= 255.
+    return img.astype(dst_type)
+
+
+def rgb2ycbcr(img, y_only=False):
+    """Convert a RGB image to YCbCr image.
+
+    This function produces the same results as Matlab's `rgb2ycbcr` function.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+    It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+        y_only (bool): Whether to only return Y channel. Default: False.
+
+    Returns:
+        ndarray: The converted YCbCr image. The output image has the same type
+            and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img)
+    if y_only:
+        out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
+    else:
+        out_img = np.matmul(
+            img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
+                  [24.966, 112.0, -18.214]]) + [16, 128, 128]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+
+
+def bgr2ycbcr(img, y_only=False):
+    """Convert a BGR image to YCbCr image.
+
+    The bgr version of rgb2ycbcr.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+    It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+        y_only (bool): Whether to only return Y channel. Default: False.
+
+    Returns:
+        ndarray: The converted YCbCr image. The output image has the same type
+            and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img)
+    if y_only:
+        out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
+    else:
+        out_img = np.matmul(
+            img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
+                  [65.481, -37.797, 112.0]]) + [16, 128, 128]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+
+
+def ycbcr2rgb(img):
+    """Convert a YCbCr image to RGB image.
+
+    This function produces the same results as Matlab's ycbcr2rgb function.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+    It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+
+    Returns:
+        ndarray: The converted RGB image. The output image has the same type
+            and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img) * 255
+    out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
+                              [0, -0.00153632, 0.00791071],
+                              [0.00625893, -0.00318811, 0]]) * 255.0 + [
+                                  -222.921, 135.576, -276.836
+                              ]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+
+
+def ycbcr2bgr(img):
+    """Convert a YCbCr image to BGR image.
+
+    The bgr version of ycbcr2rgb.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+    It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+
+    Returns:
+        ndarray: The converted BGR image. The output image has the same type
+            and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img) * 255
+    out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
+                              [0.00791071, -0.00153632, 0],
+                              [0, -0.00318811, 0.00625893]]) * 255.0 + [
+                                  -276.836, 135.576, -222.921
+                              ]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+
+
+def convert_color_factory(src, dst):
+
+    code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
+
+    def convert_color(img):
+        out_img = cv2.cvtColor(img, code)
+        return out_img
+
+    convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
+        image.
+
+    Args:
+        img (ndarray or str): The input image.
+
+    Returns:
+        ndarray: The converted {dst.upper()} image.
+    """
+
+    return convert_color
+
+
+bgr2rgb = convert_color_factory('bgr', 'rgb')
+
+rgb2bgr = convert_color_factory('rgb', 'bgr')
+
+bgr2hsv = convert_color_factory('bgr', 'hsv')
+
+hsv2bgr = convert_color_factory('hsv', 'bgr')
+
+bgr2hls = convert_color_factory('bgr', 'hls')
+
+hls2bgr = convert_color_factory('hls', 'bgr')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/geometric.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/geometric.py
new file mode 100644
index 0000000000000000000000000000000000000000..bedfceb73ba79af71c0099dccc6f9570e5c41d13
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/geometric.py
@@ -0,0 +1,606 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import numbers
+
+import cv2
+import numpy as np
+
+from .io import imread_backend
+
+try:
+    from PIL import Image
+except ImportError:
+    Image = None
+
+
+def _scale_size(size, scale):
+    """Rescale a size by a ratio.
+
+    Args:
+        size (tuple[int]): (w, h).
+        scale (float): Scaling factor.
+
+    Returns:
+        tuple[int]: scaled size.
+    """
+    w, h = size
+    return int(w * float(scale) + 0.5), int(h * float(scale) + 0.5)
+
+
+cv2_interp_codes = {
+    'nearest': cv2.INTER_NEAREST,
+    'bilinear': cv2.INTER_LINEAR,
+    'bicubic': cv2.INTER_CUBIC,
+    'area': cv2.INTER_AREA,
+    'lanczos': cv2.INTER_LANCZOS4
+}
+
+if Image is not None:
+    pillow_interp_codes = {
+        'nearest': Image.NEAREST,
+        'bilinear': Image.BILINEAR,
+        'bicubic': Image.BICUBIC,
+        'box': Image.BOX,
+        'lanczos': Image.LANCZOS,
+        'hamming': Image.HAMMING
+    }
+
+
+def imresize(img,
+             size,
+             return_scale=False,
+             interpolation='bilinear',
+             out=None,
+             backend=None):
+    """Resize image to a given size.
+
+    Args:
+        img (ndarray): The input image.
+        size (tuple[int]): Target size (w, h).
+        return_scale (bool): Whether to return `w_scale` and `h_scale`.
+        interpolation (str): Interpolation method, accepted values are
+            "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
+            backend, "nearest", "bilinear" for 'pillow' backend.
+        out (ndarray): The output destination.
+        backend (str | None): The image resize backend type. Options are `cv2`,
+            `pillow`, `None`. If backend is None, the global imread_backend
+            specified by ``mmcv.use_backend()`` will be used. Default: None.
+
+    Returns:
+        tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
+            `resized_img`.
+    """
+    h, w = img.shape[:2]
+    if backend is None:
+        backend = imread_backend
+    if backend not in ['cv2', 'pillow']:
+        raise ValueError(f'backend: {backend} is not supported for resize.'
+                         f"Supported backends are 'cv2', 'pillow'")
+
+    if backend == 'pillow':
+        assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
+        pil_image = Image.fromarray(img)
+        pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
+        resized_img = np.array(pil_image)
+    else:
+        resized_img = cv2.resize(
+            img, size, dst=out, interpolation=cv2_interp_codes[interpolation])
+    if not return_scale:
+        return resized_img
+    else:
+        w_scale = size[0] / w
+        h_scale = size[1] / h
+        return resized_img, w_scale, h_scale
+
+
+def imresize_like(img,
+                  dst_img,
+                  return_scale=False,
+                  interpolation='bilinear',
+                  backend=None):
+    """Resize image to the same size of a given image.
+
+    Args:
+        img (ndarray): The input image.
+        dst_img (ndarray): The target image.
+        return_scale (bool): Whether to return `w_scale` and `h_scale`.
+        interpolation (str): Same as :func:`resize`.
+        backend (str | None): Same as :func:`resize`.
+
+    Returns:
+        tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or
+            `resized_img`.
+    """
+    h, w = dst_img.shape[:2]
+    return imresize(img, (w, h), return_scale, interpolation, backend=backend)
+
+
+def rescale_size(old_size, scale, return_scale=False):
+    """Calculate the new size to be rescaled to.
+
+    Args:
+        old_size (tuple[int]): The old size (w, h) of image.
+        scale (float | tuple[int]): The scaling factor or maximum size.
+            If it is a float number, then the image will be rescaled by this
+            factor, else if it is a tuple of 2 integers, then the image will
+            be rescaled as large as possible within the scale.
+        return_scale (bool): Whether to return the scaling factor besides the
+            rescaled image size.
+
+    Returns:
+        tuple[int]: The new rescaled image size.
+    """
+    w, h = old_size
+    if isinstance(scale, (float, int)):
+        if scale <= 0:
+            raise ValueError(f'Invalid scale {scale}, must be positive.')
+        scale_factor = scale
+    elif isinstance(scale, tuple):
+        max_long_edge = max(scale)
+        max_short_edge = min(scale)
+        scale_factor = min(max_long_edge / max(h, w),
+                           max_short_edge / min(h, w))
+    else:
+        raise TypeError(
+            f'Scale must be a number or tuple of int, but got {type(scale)}')
+
+    new_size = _scale_size((w, h), scale_factor)
+
+    if return_scale:
+        return new_size, scale_factor
+    else:
+        return new_size
+
+
+def imrescale(img,
+              scale,
+              return_scale=False,
+              interpolation='bilinear',
+              backend=None):
+    """Resize image while keeping the aspect ratio.
+
+    Args:
+        img (ndarray): The input image.
+        scale (float | tuple[int]): The scaling factor or maximum size.
+            If it is a float number, then the image will be rescaled by this
+            factor, else if it is a tuple of 2 integers, then the image will
+            be rescaled as large as possible within the scale.
+        return_scale (bool): Whether to return the scaling factor besides the
+            rescaled image.
+        interpolation (str): Same as :func:`resize`.
+        backend (str | None): Same as :func:`resize`.
+
+    Returns:
+        ndarray: The rescaled image.
+    """
+    h, w = img.shape[:2]
+    new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
+    rescaled_img = imresize(
+        img, new_size, interpolation=interpolation, backend=backend)
+    if return_scale:
+        return rescaled_img, scale_factor
+    else:
+        return rescaled_img
+
+
+def imflip(img, direction='horizontal'):
+    """Flip an image horizontally or vertically.
+
+    Args:
+        img (ndarray): Image to be flipped.
+        direction (str): The flip direction, either "horizontal" or
+            "vertical" or "diagonal".
+
+    Returns:
+        ndarray: The flipped image.
+    """
+    assert direction in ['horizontal', 'vertical', 'diagonal']
+    if direction == 'horizontal':
+        return np.flip(img, axis=1)
+    elif direction == 'vertical':
+        return np.flip(img, axis=0)
+    else:
+        return np.flip(img, axis=(0, 1))
+
+
+def imflip_(img, direction='horizontal'):
+    """Inplace flip an image horizontally or vertically.
+
+    Args:
+        img (ndarray): Image to be flipped.
+        direction (str): The flip direction, either "horizontal" or
+            "vertical" or "diagonal".
+
+    Returns:
+        ndarray: The flipped image (inplace).
+    """
+    assert direction in ['horizontal', 'vertical', 'diagonal']
+    if direction == 'horizontal':
+        return cv2.flip(img, 1, img)
+    elif direction == 'vertical':
+        return cv2.flip(img, 0, img)
+    else:
+        return cv2.flip(img, -1, img)
+
+
+def imrotate(img,
+             angle,
+             center=None,
+             scale=1.0,
+             border_value=0,
+             interpolation='bilinear',
+             auto_bound=False):
+    """Rotate an image.
+
+    Args:
+        img (ndarray): Image to be rotated.
+        angle (float): Rotation angle in degrees, positive values mean
+            clockwise rotation.
+        center (tuple[float], optional): Center point (w, h) of the rotation in
+            the source image. If not specified, the center of the image will be
+            used.
+        scale (float): Isotropic scale factor.
+        border_value (int): Border value.
+        interpolation (str): Same as :func:`resize`.
+        auto_bound (bool): Whether to adjust the image size to cover the whole
+            rotated image.
+
+    Returns:
+        ndarray: The rotated image.
+    """
+    if center is not None and auto_bound:
+        raise ValueError('`auto_bound` conflicts with `center`')
+    h, w = img.shape[:2]
+    if center is None:
+        center = ((w - 1) * 0.5, (h - 1) * 0.5)
+    assert isinstance(center, tuple)
+
+    matrix = cv2.getRotationMatrix2D(center, -angle, scale)
+    if auto_bound:
+        cos = np.abs(matrix[0, 0])
+        sin = np.abs(matrix[0, 1])
+        new_w = h * sin + w * cos
+        new_h = h * cos + w * sin
+        matrix[0, 2] += (new_w - w) * 0.5
+        matrix[1, 2] += (new_h - h) * 0.5
+        w = int(np.round(new_w))
+        h = int(np.round(new_h))
+    rotated = cv2.warpAffine(
+        img,
+        matrix, (w, h),
+        flags=cv2_interp_codes[interpolation],
+        borderValue=border_value)
+    return rotated
+
+
+def bbox_clip(bboxes, img_shape):
+    """Clip bboxes to fit the image shape.
+
+    Args:
+        bboxes (ndarray): Shape (..., 4*k)
+        img_shape (tuple[int]): (height, width) of the image.
+
+    Returns:
+        ndarray: Clipped bboxes.
+    """
+    assert bboxes.shape[-1] % 4 == 0
+    cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype)
+    cmin[0::2] = img_shape[1] - 1
+    cmin[1::2] = img_shape[0] - 1
+    clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0)
+    return clipped_bboxes
+
+
+def bbox_scaling(bboxes, scale, clip_shape=None):
+    """Scaling bboxes w.r.t the box center.
+
+    Args:
+        bboxes (ndarray): Shape(..., 4).
+        scale (float): Scaling factor.
+        clip_shape (tuple[int], optional): If specified, bboxes that exceed the
+            boundary will be clipped according to the given shape (h, w).
+
+    Returns:
+        ndarray: Scaled bboxes.
+    """
+    if float(scale) == 1.0:
+        scaled_bboxes = bboxes.copy()
+    else:
+        w = bboxes[..., 2] - bboxes[..., 0] + 1
+        h = bboxes[..., 3] - bboxes[..., 1] + 1
+        dw = (w * (scale - 1)) * 0.5
+        dh = (h * (scale - 1)) * 0.5
+        scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1)
+    if clip_shape is not None:
+        return bbox_clip(scaled_bboxes, clip_shape)
+    else:
+        return scaled_bboxes
+
+
+def imcrop(img, bboxes, scale=1.0, pad_fill=None):
+    """Crop image patches.
+
+    3 steps: scale the bboxes -> clip bboxes -> crop and pad.
+
+    Args:
+        img (ndarray): Image to be cropped.
+        bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes.
+        scale (float, optional): Scale ratio of bboxes, the default value
+            1.0 means no padding.
+        pad_fill (Number | list[Number]): Value to be filled for padding.
+            Default: None, which means no padding.
+
+    Returns:
+        list[ndarray] | ndarray: The cropped image patches.
+    """
+    chn = 1 if img.ndim == 2 else img.shape[2]
+    if pad_fill is not None:
+        if isinstance(pad_fill, (int, float)):
+            pad_fill = [pad_fill for _ in range(chn)]
+        assert len(pad_fill) == chn
+
+    _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes
+    scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32)
+    clipped_bbox = bbox_clip(scaled_bboxes, img.shape)
+
+    patches = []
+    for i in range(clipped_bbox.shape[0]):
+        x1, y1, x2, y2 = tuple(clipped_bbox[i, :])
+        if pad_fill is None:
+            patch = img[y1:y2 + 1, x1:x2 + 1, ...]
+        else:
+            _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :])
+            if chn == 1:
+                patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1)
+            else:
+                patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn)
+            patch = np.array(
+                pad_fill, dtype=img.dtype) * np.ones(
+                    patch_shape, dtype=img.dtype)
+            x_start = 0 if _x1 >= 0 else -_x1
+            y_start = 0 if _y1 >= 0 else -_y1
+            w = x2 - x1 + 1
+            h = y2 - y1 + 1
+            patch[y_start:y_start + h, x_start:x_start + w,
+                  ...] = img[y1:y1 + h, x1:x1 + w, ...]
+        patches.append(patch)
+
+    if bboxes.ndim == 1:
+        return patches[0]
+    else:
+        return patches
+
+
+def impad(img,
+          *,
+          shape=None,
+          padding=None,
+          pad_val=0,
+          padding_mode='constant'):
+    """Pad the given image to a certain shape or pad on all sides with
+    specified padding mode and padding value.
+
+    Args:
+        img (ndarray): Image to be padded.
+        shape (tuple[int]): Expected padding shape (h, w). Default: None.
+        padding (int or tuple[int]): Padding on each border. If a single int is
+            provided this is used to pad all borders. If tuple of length 2 is
+            provided this is the padding on left/right and top/bottom
+            respectively. If a tuple of length 4 is provided this is the
+            padding for the left, top, right and bottom borders respectively.
+            Default: None. Note that `shape` and `padding` can not be both
+            set.
+        pad_val (Number | Sequence[Number]): Values to be filled in padding
+            areas when padding_mode is 'constant'. Default: 0.
+        padding_mode (str): Type of padding. Should be: constant, edge,
+            reflect or symmetric. Default: constant.
+
+            - constant: pads with a constant value, this value is specified
+                with pad_val.
+            - edge: pads with the last value at the edge of the image.
+            - reflect: pads with reflection of image without repeating the
+                last value on the edge. For example, padding [1, 2, 3, 4]
+                with 2 elements on both sides in reflect mode will result
+                in [3, 2, 1, 2, 3, 4, 3, 2].
+            - symmetric: pads with reflection of image repeating the last
+                value on the edge. For example, padding [1, 2, 3, 4] with
+                2 elements on both sides in symmetric mode will result in
+                [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        ndarray: The padded image.
+    """
+
+    assert (shape is not None) ^ (padding is not None)
+    if shape is not None:
+        padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0])
+
+    # check pad_val
+    if isinstance(pad_val, tuple):
+        assert len(pad_val) == img.shape[-1]
+    elif not isinstance(pad_val, numbers.Number):
+        raise TypeError('pad_val must be a int or a tuple. '
+                        f'But received {type(pad_val)}')
+
+    # check padding
+    if isinstance(padding, tuple) and len(padding) in [2, 4]:
+        if len(padding) == 2:
+            padding = (padding[0], padding[1], padding[0], padding[1])
+    elif isinstance(padding, numbers.Number):
+        padding = (padding, padding, padding, padding)
+    else:
+        raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
+                         f'But received {padding}')
+
+    # check padding mode
+    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
+
+    border_type = {
+        'constant': cv2.BORDER_CONSTANT,
+        'edge': cv2.BORDER_REPLICATE,
+        'reflect': cv2.BORDER_REFLECT_101,
+        'symmetric': cv2.BORDER_REFLECT
+    }
+    img = cv2.copyMakeBorder(
+        img,
+        padding[1],
+        padding[3],
+        padding[0],
+        padding[2],
+        border_type[padding_mode],
+        value=pad_val)
+
+    return img
+
+
+def impad_to_multiple(img, divisor, pad_val=0):
+    """Pad an image to ensure each edge to be multiple to some number.
+
+    Args:
+        img (ndarray): Image to be padded.
+        divisor (int): Padded image edges will be multiple to divisor.
+        pad_val (Number | Sequence[Number]): Same as :func:`impad`.
+
+    Returns:
+        ndarray: The padded image.
+    """
+    pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
+    pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
+    return impad(img, shape=(pad_h, pad_w), pad_val=pad_val)
+
+
+def _get_shear_matrix(magnitude, direction='horizontal'):
+    """Generate the shear matrix for transformation.
+
+    Args:
+        magnitude (int | float): The magnitude used for shear.
+        direction (str): Thie flip direction, either "horizontal"
+            or "vertical".
+
+    Returns:
+        ndarray: The shear matrix with dtype float32.
+    """
+    if direction == 'horizontal':
+        shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]])
+    elif direction == 'vertical':
+        shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]])
+    return shear_matrix
+
+
+def imshear(img,
+            magnitude,
+            direction='horizontal',
+            border_value=0,
+            interpolation='bilinear'):
+    """Shear an image.
+
+    Args:
+        img (ndarray): Image to be sheared with format (h, w)
+            or (h, w, c).
+        magnitude (int | float): The magnitude used for shear.
+        direction (str): Thie flip direction, either "horizontal"
+            or "vertical".
+        border_value (int | tuple[int]): Value used in case of a
+            constant border.
+        interpolation (str): Same as :func:`resize`.
+
+    Returns:
+        ndarray: The sheared image.
+    """
+    assert direction in ['horizontal',
+                         'vertical'], f'Invalid direction: {direction}'
+    height, width = img.shape[:2]
+    if img.ndim == 2:
+        channels = 1
+    elif img.ndim == 3:
+        channels = img.shape[-1]
+    if isinstance(border_value, int):
+        border_value = tuple([border_value] * channels)
+    elif isinstance(border_value, tuple):
+        assert len(border_value) == channels, \
+            'Expected the num of elements in tuple equals the channels' \
+            'of input image. Found {} vs {}'.format(
+                len(border_value), channels)
+    else:
+        raise ValueError(
+            f'Invalid type {type(border_value)} for `border_value`')
+    shear_matrix = _get_shear_matrix(magnitude, direction)
+    sheared = cv2.warpAffine(
+        img,
+        shear_matrix,
+        (width, height),
+        # Note case when the number elements in `border_value`
+        # greater than 3 (e.g. shearing masks whose channels large
+        # than 3) will raise TypeError in `cv2.warpAffine`.
+        # Here simply slice the first 3 values in `border_value`.
+        borderValue=border_value[:3],
+        flags=cv2_interp_codes[interpolation])
+    return sheared
+
+
+def _get_translate_matrix(offset, direction='horizontal'):
+    """Generate the translate matrix.
+
+    Args:
+        offset (int | float): The offset used for translate.
+        direction (str): The translate direction, either
+            "horizontal" or "vertical".
+
+    Returns:
+        ndarray: The translate matrix with dtype float32.
+    """
+    if direction == 'horizontal':
+        translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
+    elif direction == 'vertical':
+        translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
+    return translate_matrix
+
+
+def imtranslate(img,
+                offset,
+                direction='horizontal',
+                border_value=0,
+                interpolation='bilinear'):
+    """Translate an image.
+
+    Args:
+        img (ndarray): Image to be translated with format
+            (h, w) or (h, w, c).
+        offset (int | float): The offset used for translate.
+        direction (str): The translate direction, either "horizontal"
+            or "vertical".
+        border_value (int | tuple[int]): Value used in case of a
+            constant border.
+        interpolation (str): Same as :func:`resize`.
+
+    Returns:
+        ndarray: The translated image.
+    """
+    assert direction in ['horizontal',
+                         'vertical'], f'Invalid direction: {direction}'
+    height, width = img.shape[:2]
+    if img.ndim == 2:
+        channels = 1
+    elif img.ndim == 3:
+        channels = img.shape[-1]
+    if isinstance(border_value, int):
+        border_value = tuple([border_value] * channels)
+    elif isinstance(border_value, tuple):
+        assert len(border_value) == channels, \
+            'Expected the num of elements in tuple equals the channels' \
+            'of input image. Found {} vs {}'.format(
+                len(border_value), channels)
+    else:
+        raise ValueError(
+            f'Invalid type {type(border_value)} for `border_value`.')
+    translate_matrix = _get_translate_matrix(offset, direction)
+    translated = cv2.warpAffine(
+        img,
+        translate_matrix,
+        (width, height),
+        # Note case when the number elements in `border_value`
+        # greater than 3 (e.g. translating masks whose channels
+        # large than 3) will raise TypeError in `cv2.warpAffine`.
+        # Here simply slice the first 3 values in `border_value`.
+        borderValue=border_value[:3],
+        flags=cv2_interp_codes[interpolation])
+    return translated
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/io.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6598805320892d0e49f5c699be5245ab8a63aa1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/io.py
@@ -0,0 +1,235 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import io
+import os.path as osp
+from pathlib import Path
+
+import cv2
+import numpy as np
+from cv2 import IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_UNCHANGED
+
+from mmcv.utils import check_file_exist, is_str, mkdir_or_exist
+
+try:
+    from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG
+except ImportError:
+    TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None
+
+try:
+    from PIL import Image, ImageOps
+except ImportError:
+    Image = None
+
+jpeg = None
+supported_backends = ['cv2', 'turbojpeg', 'pillow']
+
+imread_flags = {
+    'color': IMREAD_COLOR,
+    'grayscale': IMREAD_GRAYSCALE,
+    'unchanged': IMREAD_UNCHANGED
+}
+
+imread_backend = 'cv2'
+
+
+def use_backend(backend):
+    """Select a backend for image decoding.
+
+    Args:
+        backend (str): The image decoding backend type. Options are `cv2`,
+        `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG).
+        `turbojpeg` is faster but it only supports `.jpeg` file format.
+    """
+    assert backend in supported_backends
+    global imread_backend
+    imread_backend = backend
+    if imread_backend == 'turbojpeg':
+        if TurboJPEG is None:
+            raise ImportError('`PyTurboJPEG` is not installed')
+        global jpeg
+        if jpeg is None:
+            jpeg = TurboJPEG()
+    elif imread_backend == 'pillow':
+        if Image is None:
+            raise ImportError('`Pillow` is not installed')
+
+
+def _jpegflag(flag='color', channel_order='bgr'):
+    channel_order = channel_order.lower()
+    if channel_order not in ['rgb', 'bgr']:
+        raise ValueError('channel order must be either "rgb" or "bgr"')
+
+    if flag == 'color':
+        if channel_order == 'bgr':
+            return TJPF_BGR
+        elif channel_order == 'rgb':
+            return TJCS_RGB
+    elif flag == 'grayscale':
+        return TJPF_GRAY
+    else:
+        raise ValueError('flag must be "color" or "grayscale"')
+
+
+def _pillow2array(img, flag='color', channel_order='bgr'):
+    """Convert a pillow image to numpy array.
+
+    Args:
+        img (:obj:`PIL.Image.Image`): The image loaded using PIL
+        flag (str): Flags specifying the color type of a loaded image,
+            candidates are 'color', 'grayscale' and 'unchanged'.
+            Default to 'color'.
+        channel_order (str): The channel order of the output image array,
+            candidates are 'bgr' and 'rgb'. Default to 'bgr'.
+
+    Returns:
+        np.ndarray: The converted numpy array
+    """
+    channel_order = channel_order.lower()
+    if channel_order not in ['rgb', 'bgr']:
+        raise ValueError('channel order must be either "rgb" or "bgr"')
+
+    if flag == 'unchanged':
+        array = np.array(img)
+        if array.ndim >= 3 and array.shape[2] >= 3:  # color image
+            array[:, :, :3] = array[:, :, (2, 1, 0)]  # RGB to BGR
+    else:
+        # Handle exif orientation tag
+        img = ImageOps.exif_transpose(img)
+        # If the image mode is not 'RGB', convert it to 'RGB' first.
+        if img.mode != 'RGB':
+            if img.mode != 'LA':
+                # Most formats except 'LA' can be directly converted to RGB
+                img = img.convert('RGB')
+            else:
+                # When the mode is 'LA', the default conversion will fill in
+                #  the canvas with black, which sometimes shadows black objects
+                #  in the foreground.
+                #
+                # Therefore, a random color (124, 117, 104) is used for canvas
+                img_rgba = img.convert('RGBA')
+                img = Image.new('RGB', img_rgba.size, (124, 117, 104))
+                img.paste(img_rgba, mask=img_rgba.split()[3])  # 3 is alpha
+        if flag == 'color':
+            array = np.array(img)
+            if channel_order != 'rgb':
+                array = array[:, :, ::-1]  # RGB to BGR
+        elif flag == 'grayscale':
+            img = img.convert('L')
+            array = np.array(img)
+        else:
+            raise ValueError(
+                'flag must be "color", "grayscale" or "unchanged", '
+                f'but got {flag}')
+    return array
+
+
+def imread(img_or_path, flag='color', channel_order='bgr', backend=None):
+    """Read an image.
+
+    Args:
+        img_or_path (ndarray or str or Path): Either a numpy array or str or
+            pathlib.Path. If it is a numpy array (loaded image), then
+            it will be returned as is.
+        flag (str): Flags specifying the color type of a loaded image,
+            candidates are `color`, `grayscale` and `unchanged`.
+            Note that the `turbojpeg` backened does not support `unchanged`.
+        channel_order (str): Order of channel, candidates are `bgr` and `rgb`.
+        backend (str | None): The image decoding backend type. Options are
+            `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the
+            global imread_backend specified by ``mmcv.use_backend()`` will be
+            used. Default: None.
+
+    Returns:
+        ndarray: Loaded image array.
+    """
+
+    if backend is None:
+        backend = imread_backend
+    if backend not in supported_backends:
+        raise ValueError(f'backend: {backend} is not supported. Supported '
+                         "backends are 'cv2', 'turbojpeg', 'pillow'")
+    if isinstance(img_or_path, Path):
+        img_or_path = str(img_or_path)
+
+    if isinstance(img_or_path, np.ndarray):
+        return img_or_path
+    elif is_str(img_or_path):
+        check_file_exist(img_or_path,
+                         f'img file does not exist: {img_or_path}')
+        if backend == 'turbojpeg':
+            with open(img_or_path, 'rb') as in_file:
+                img = jpeg.decode(in_file.read(),
+                                  _jpegflag(flag, channel_order))
+                if img.shape[-1] == 1:
+                    img = img[:, :, 0]
+            return img
+        elif backend == 'pillow':
+            img = Image.open(img_or_path)
+            img = _pillow2array(img, flag, channel_order)
+            return img
+        else:
+            flag = imread_flags[flag] if is_str(flag) else flag
+            img = cv2.imread(img_or_path, flag)
+            if flag == IMREAD_COLOR and channel_order == 'rgb':
+                cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+            return img
+    else:
+        raise TypeError('"img" must be a numpy array or a str or '
+                        'a pathlib.Path object')
+
+
+def imfrombytes(content, flag='color', channel_order='bgr', backend=None):
+    """Read an image from bytes.
+
+    Args:
+        content (bytes): Image bytes got from files or other streams.
+        flag (str): Same as :func:`imread`.
+        backend (str | None): The image decoding backend type. Options are
+            `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the
+            global imread_backend specified by ``mmcv.use_backend()`` will be
+            used. Default: None.
+
+    Returns:
+        ndarray: Loaded image array.
+    """
+
+    if backend is None:
+        backend = imread_backend
+    if backend not in supported_backends:
+        raise ValueError(f'backend: {backend} is not supported. Supported '
+                         "backends are 'cv2', 'turbojpeg', 'pillow'")
+    if backend == 'turbojpeg':
+        img = jpeg.decode(content, _jpegflag(flag, channel_order))
+        if img.shape[-1] == 1:
+            img = img[:, :, 0]
+        return img
+    elif backend == 'pillow':
+        buff = io.BytesIO(content)
+        img = Image.open(buff)
+        img = _pillow2array(img, flag, channel_order)
+        return img
+    else:
+        img_np = np.frombuffer(content, np.uint8)
+        flag = imread_flags[flag] if is_str(flag) else flag
+        img = cv2.imdecode(img_np, flag)
+        if flag == IMREAD_COLOR and channel_order == 'rgb':
+            cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+        return img
+
+
+def imwrite(img, file_path, params=None, auto_mkdir=True):
+    """Write image to file.
+
+    Args:
+        img (ndarray): Image array to be written.
+        file_path (str): Image file path.
+        params (None or list): Same as opencv's :func:`imwrite` interface.
+        auto_mkdir (bool): If the parent folder of `file_path` does not exist,
+            whether to create it automatically.
+
+    Returns:
+        bool: Successful or not.
+    """
+    if auto_mkdir:
+        dir_name = osp.abspath(osp.dirname(file_path))
+        mkdir_or_exist(dir_name)
+    return cv2.imwrite(file_path, img, params)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/misc.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e02b952e2a36b4964a8812d08dcf753007c0280
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/misc.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+import mmcv
+
+try:
+    import torch
+except ImportError:
+    torch = None
+
+
+def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
+    """Convert tensor to 3-channel images.
+
+    Args:
+        tensor (torch.Tensor): Tensor that contains multiple images, shape (
+            N, C, H, W).
+        mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0).
+        std (tuple[float], optional): Standard deviation of images.
+            Defaults to (1, 1, 1).
+        to_rgb (bool, optional): Whether the tensor was converted to RGB
+            format in the first place. If so, convert it back to BGR.
+            Defaults to True.
+
+    Returns:
+        list[np.ndarray]: A list that contains multiple images.
+    """
+
+    if torch is None:
+        raise RuntimeError('pytorch is not installed')
+    assert torch.is_tensor(tensor) and tensor.ndim == 4
+    assert len(mean) == 3
+    assert len(std) == 3
+
+    num_imgs = tensor.size(0)
+    mean = np.array(mean, dtype=np.float32)
+    std = np.array(std, dtype=np.float32)
+    imgs = []
+    for img_id in range(num_imgs):
+        img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
+        img = mmcv.imdenormalize(
+            img, mean, std, to_bgr=to_rgb).astype(np.uint8)
+        imgs.append(np.ascontiguousarray(img))
+    return imgs
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/photometric.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/photometric.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0279274ba8e4f49f87b91d8761156c85ad73526
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/image/photometric.py
@@ -0,0 +1,277 @@
+import cv2
+import numpy as np
+
+from ..utils import is_tuple_of
+from .colorspace import bgr2gray, gray2bgr
+
+
+def imnormalize(img, mean, std, to_rgb=True):
+    """Normalize an image with mean and std.
+
+    Args:
+        img (ndarray): Image to be normalized.
+        mean (ndarray): The mean to be used for normalize.
+        std (ndarray): The std to be used for normalize.
+        to_rgb (bool): Whether to convert to rgb.
+
+    Returns:
+        ndarray: The normalized image.
+    """
+    img = img.copy().astype(np.float32)
+    return imnormalize_(img, mean, std, to_rgb)
+
+
+def imnormalize_(img, mean, std, to_rgb=True):
+    """Inplace normalize an image with mean and std.
+
+    Args:
+        img (ndarray): Image to be normalized.
+        mean (ndarray): The mean to be used for normalize.
+        std (ndarray): The std to be used for normalize.
+        to_rgb (bool): Whether to convert to rgb.
+
+    Returns:
+        ndarray: The normalized image.
+    """
+    # cv2 inplace normalization does not accept uint8
+    assert img.dtype != np.uint8
+    mean = np.float64(mean.reshape(1, -1))
+    stdinv = 1 / np.float64(std.reshape(1, -1))
+    if to_rgb:
+        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)  # inplace
+    cv2.subtract(img, mean, img)  # inplace
+    cv2.multiply(img, stdinv, img)  # inplace
+    return img
+
+
+def imdenormalize(img, mean, std, to_bgr=True):
+    assert img.dtype != np.uint8
+    mean = mean.reshape(1, -1).astype(np.float64)
+    std = std.reshape(1, -1).astype(np.float64)
+    img = cv2.multiply(img, std)  # make a copy
+    cv2.add(img, mean, img)  # inplace
+    if to_bgr:
+        cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img)  # inplace
+    return img
+
+
+def iminvert(img):
+    """Invert (negate) an image.
+
+    Args:
+        img (ndarray): Image to be inverted.
+
+    Returns:
+        ndarray: The inverted image.
+    """
+    return np.full_like(img, 255) - img
+
+
+def solarize(img, thr=128):
+    """Solarize an image (invert all pixel values above a threshold)
+
+    Args:
+        img (ndarray): Image to be solarized.
+        thr (int): Threshold for solarizing (0 - 255).
+
+    Returns:
+        ndarray: The solarized image.
+    """
+    img = np.where(img < thr, img, 255 - img)
+    return img
+
+
+def posterize(img, bits):
+    """Posterize an image (reduce the number of bits for each color channel)
+
+    Args:
+        img (ndarray): Image to be posterized.
+        bits (int): Number of bits (1 to 8) to use for posterizing.
+
+    Returns:
+        ndarray: The posterized image.
+    """
+    shift = 8 - bits
+    img = np.left_shift(np.right_shift(img, shift), shift)
+    return img
+
+
+def adjust_color(img, alpha=1, beta=None, gamma=0):
+    """It blends the source image and its gray image:
+
+    ``output = img * alpha + gray_img * beta + gamma``
+
+    Args:
+        img (ndarray): The input source image.
+        alpha (int | float): Weight for the source image. Default 1.
+        beta (int | float): Weight for the converted gray image.
+            If None, it's assigned the value (1 - `alpha`).
+        gamma (int | float): Scalar added to each sum.
+            Same as :func:`cv2.addWeighted`. Default 0.
+
+    Returns:
+        ndarray: Colored image which has the same size and dtype as input.
+    """
+    gray_img = bgr2gray(img)
+    gray_img = np.tile(gray_img[..., None], [1, 1, 3])
+    if beta is None:
+        beta = 1 - alpha
+    colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma)
+    if not colored_img.dtype == np.uint8:
+        # Note when the dtype of `img` is not defaultly `np.uint8`
+        # (e.g. np.float32), the value in `colored_img` got from cv2
+        # is not guaranteed to be in range [0, 255], so here clip
+        # is needed.
+        colored_img = np.clip(colored_img, 0, 255)
+    return colored_img
+
+
+def imequalize(img):
+    """Equalize the image histogram.
+
+    This function applies a non-linear mapping to the input image,
+    in order to create a uniform distribution of grayscale values
+    in the output image.
+
+    Args:
+        img (ndarray): Image to be equalized.
+
+    Returns:
+        ndarray: The equalized image.
+    """
+
+    def _scale_channel(im, c):
+        """Scale the data in the corresponding channel."""
+        im = im[:, :, c]
+        # Compute the histogram of the image channel.
+        histo = np.histogram(im, 256, (0, 255))[0]
+        # For computing the step, filter out the nonzeros.
+        nonzero_histo = histo[histo > 0]
+        step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255
+        if not step:
+            lut = np.array(range(256))
+        else:
+            # Compute the cumulative sum, shifted by step // 2
+            # and then normalized by step.
+            lut = (np.cumsum(histo) + (step // 2)) // step
+            # Shift lut, prepending with 0.
+            lut = np.concatenate([[0], lut[:-1]], 0)
+        # If step is zero, return the original image.
+        # Otherwise, index from lut.
+        return np.where(np.equal(step, 0), im, lut[im])
+
+    # Scales each channel independently and then stacks
+    # the result.
+    s1 = _scale_channel(img, 0)
+    s2 = _scale_channel(img, 1)
+    s3 = _scale_channel(img, 2)
+    equalized_img = np.stack([s1, s2, s3], axis=-1)
+    return equalized_img
+
+
+def adjust_brightness(img, factor=1.):
+    """Adjust image brightness.
+
+    This function controls the brightness of an image. An
+    enhancement factor of 0.0 gives a black image.
+    A factor of 1.0 gives the original image. This function
+    blends the source image and the degenerated black image:
+
+    ``output = img * factor + degenerated * (1 - factor)``
+
+    Args:
+        img (ndarray): Image to be brightened.
+        factor (float): A value controls the enhancement.
+            Factor 1.0 returns the original image, lower
+            factors mean less color (brightness, contrast,
+            etc), and higher values more. Default 1.
+
+    Returns:
+        ndarray: The brightened image.
+    """
+    degenerated = np.zeros_like(img)
+    # Note manually convert the dtype to np.float32, to
+    # achieve as close results as PIL.ImageEnhance.Brightness.
+    # Set beta=1-factor, and gamma=0
+    brightened_img = cv2.addWeighted(
+        img.astype(np.float32), factor, degenerated.astype(np.float32),
+        1 - factor, 0)
+    return brightened_img.astype(img.dtype)
+
+
+def adjust_contrast(img, factor=1.):
+    """Adjust image contrast.
+
+    This function controls the contrast of an image. An
+    enhancement factor of 0.0 gives a solid grey
+    image. A factor of 1.0 gives the original image. It
+    blends the source image and the degenerated mean image:
+
+    ``output = img * factor + degenerated * (1 - factor)``
+
+    Args:
+        img (ndarray): Image to be contrasted. BGR order.
+        factor (float): Same as :func:`mmcv.adjust_brightness`.
+
+    Returns:
+        ndarray: The contrasted image.
+    """
+    gray_img = bgr2gray(img)
+    hist = np.histogram(gray_img, 256, (0, 255))[0]
+    mean = round(np.sum(gray_img) / np.sum(hist))
+    degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype)
+    degenerated = gray2bgr(degenerated)
+    contrasted_img = cv2.addWeighted(
+        img.astype(np.float32), factor, degenerated.astype(np.float32),
+        1 - factor, 0)
+    return contrasted_img.astype(img.dtype)
+
+
+def lut_transform(img, lut_table):
+    """Transform array by look-up table.
+
+    The function lut_transform fills the output array with values from the
+    look-up table. Indices of the entries are taken from the input array.
+
+    Args:
+        img (ndarray): Image to be transformed.
+        lut_table (ndarray): look-up table of 256 elements; in case of
+            multi-channel input array, the table should either have a single
+            channel (in this case the same table is used for all channels) or
+            the same number of channels as in the input array.
+
+    Returns:
+        ndarray: The transformed image.
+    """
+    assert isinstance(img, np.ndarray)
+    assert 0 <= np.min(img) and np.max(img) <= 255
+    assert isinstance(lut_table, np.ndarray)
+    assert lut_table.shape == (256, )
+
+    return cv2.LUT(np.array(img, dtype=np.uint8), lut_table)
+
+
+def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)):
+    """Use CLAHE method to process the image.
+
+    See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
+    Graphics Gems, 1994:474-485.` for more information.
+
+    Args:
+        img (ndarray): Image to be processed.
+        clip_limit (float): Threshold for contrast limiting. Default: 40.0.
+        tile_grid_size (tuple[int]): Size of grid for histogram equalization.
+            Input image will be divided into equally sized rectangular tiles.
+            It defines the number of tiles in row and column. Default: (8, 8).
+
+    Returns:
+        ndarray: The processed image.
+    """
+    assert isinstance(img, np.ndarray)
+    assert img.ndim == 2
+    assert isinstance(clip_limit, (float, int))
+    assert is_tuple_of(tile_grid_size, int)
+    assert len(tile_grid_size) == 2
+
+    clahe = cv2.createCLAHE(clip_limit, tile_grid_size)
+    return clahe.apply(np.array(img, dtype=np.uint8))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/deprecated.json b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/deprecated.json
new file mode 100644
index 0000000000000000000000000000000000000000..25cf6f28caecc22a77e3136fefa6b8dfc0e6cb5b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/deprecated.json
@@ -0,0 +1,6 @@
+{
+  "resnet50_caffe": "detectron/resnet50_caffe",
+  "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr",
+  "resnet101_caffe": "detectron/resnet101_caffe",
+  "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr"
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/mmcls.json b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/mmcls.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce9852d447b5363a23fa5ce43c707585f39bbcf8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/mmcls.json
@@ -0,0 +1,31 @@
+{
+  "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_imagenet-01ecd97e.pth",
+  "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_imagenet-9ad3945d.pth",
+  "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_imagenet-91b6d117.pth",
+  "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_imagenet-fee352a8.pth",
+  "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_imagenet-6fbbbf3f.pth",
+  "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_imagenet-4b5f9390.pth",
+  "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_imagenet-3ac6d8fd.pth",
+  "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_imagenet-7c058385.pth",
+  "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth",
+  "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth",
+  "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth",
+  "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth",
+  "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth",
+  "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth",
+  "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth",
+  "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth",
+  "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_batch256_imagenet_20200708-c07adbb7.pth",
+  "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_batch256_imagenet_20200708-87f2d1c9.pth",
+  "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_batch256_imagenet_20200708-1ec34aa7.pth",
+  "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_batch256_imagenet_20200708-aab5034c.pth",
+  "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth",
+  "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth",
+  "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth",
+  "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth",
+  "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth",
+  "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth",
+  "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth",
+  "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth",
+  "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth"
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/open_mmlab.json b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/open_mmlab.json
new file mode 100644
index 0000000000000000000000000000000000000000..44c24f6bfecd8d8a18d55015b2099049c43c0732
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/model_zoo/open_mmlab.json
@@ -0,0 +1,49 @@
+{
+  "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth",
+  "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth",
+  "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth",
+  "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth",
+  "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth",
+  "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth",
+  "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth",
+  "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth",
+  "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth",
+  "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth",
+  "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth",
+  "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth",
+  "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth",
+  "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth",
+  "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth",
+  "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth",
+  "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth",
+  "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth",
+  "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth",
+  "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth",
+  "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth",
+  "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth",
+  "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth",
+  "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth",
+  "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth",
+  "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth",
+  "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth",
+  "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth",
+  "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth",
+  "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth",
+  "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth",
+  "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth",
+  "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth",
+  "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth",
+  "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth",
+  "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth",
+  "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth",
+  "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth",
+  "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth",
+  "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth",
+  "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth",
+  "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth",
+  "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth",
+  "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth",
+  "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth",
+  "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth",
+  "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth"
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..12c57c07a9a12d8df1a90cb59fab189a837ec742
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/__init__.py
@@ -0,0 +1,4 @@
+from .info import is_custom_op_loaded
+from .symbolic import register_extra_symbolics
+
+__all__ = ['register_extra_symbolics', 'is_custom_op_loaded']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/info.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/info.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c8ba391df5ff69b9b4a5278a5b84527f75ba2cf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/info.py
@@ -0,0 +1,18 @@
+import os
+
+
+def is_custom_op_loaded():
+    flag = False
+    try:
+        from ..tensorrt import is_tensorrt_plugin_loaded
+        flag = is_tensorrt_plugin_loaded()
+    except (ImportError, ModuleNotFoundError):
+        pass
+    if not flag:
+        try:
+            from ..ops import get_onnxruntime_op_path
+            ort_lib_path = get_onnxruntime_op_path()
+            flag = os.path.exists(ort_lib_path)
+        except (ImportError, ModuleNotFoundError):
+            pass
+    return flag
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/onnx_utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/onnx_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/onnx_utils/symbolic_helper.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/onnx_utils/symbolic_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb57c1b8977369fe66138d707c35c823f9adb9a7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/onnx_utils/symbolic_helper.py
@@ -0,0 +1,311 @@
+"""Modified from https://github.com/pytorch/pytorch."""
+import warnings
+from functools import wraps
+from sys import maxsize
+
+import torch
+import torch.onnx
+# This import monkey-patches graph manipulation methods on Graph, used for the
+# ONNX symbolics
+import torch.onnx.utils
+from torch._C import ListType
+
+# ---------------------------------------------------------------------------------
+# Helper functions
+# ---------------------------------------------------------------------------------
+
+# Save some builtins as locals, because we'll shadown them below
+_sum = sum
+
+
+def _parse_arg(value, desc):
+    if desc == 'none':
+        return value
+    if desc == 'v' or not _is_value(value):
+        return value
+    if value.node().mustBeNone():
+        return None
+    if value.node().kind() == 'onnx::Constant':
+        tval = value.node()['value']
+        if desc == 'i':
+            return int(tval)
+        elif desc == 'f':
+            return float(tval)
+        elif desc == 'b':
+            return bool(tval)
+        elif desc == 's':
+            return str(tval)
+        elif desc == 't':
+            return tval
+        elif desc == 'is':
+            return [int(v) for v in tval]
+        elif desc == 'fs':
+            return [float(v) for v in tval]
+        else:
+            raise RuntimeError(
+                "ONNX symbolic doesn't know to interpret Constant node")
+    elif value.node().kind() == 'prim::ListConstruct':
+        if desc == 'is':
+            for v in value.node().inputs():
+                if v.node().kind() != 'onnx::Constant':
+                    raise RuntimeError(
+                        "Failed to export an ONNX attribute '" +
+                        v.node().kind() +
+                        "', since it's not constant, please try to make "
+                        'things (e.g., kernel size) static if possible')
+            return [int(v.node()['value']) for v in value.node().inputs()]
+        else:
+            raise RuntimeError(
+                "ONNX symbolic doesn't know to interpret ListConstruct node")
+
+    raise RuntimeError('Unexpected node type: {}'.format(value.node().kind()))
+
+
+def _maybe_get_const(value, desc):
+    if _is_value(value) and value.node().kind() == 'onnx::Constant':
+        return _parse_arg(value, desc)
+    return value
+
+
+def _maybe_get_scalar(value):
+    value_t = _maybe_get_const(value, 't')
+    if isinstance(value_t, torch.Tensor) and value_t.shape == ():
+        return value_t
+    return value
+
+
+def _get_const(value, desc, arg_name):
+    if _is_value(value) and value.node().kind() not in ('onnx::Constant',
+                                                        'prim::Constant'):
+        raise RuntimeError('ONNX symbolic expected a constant'
+                           ' value of the {} argument, got `{}`'.format(
+                               arg_name, value))
+    return _parse_arg(value, desc)
+
+
+def _unpack_list(list_value):
+    list_node = list_value.node()
+    assert list_node.kind() == 'prim::ListConstruct'
+    return list(list_node.inputs())
+
+
+# Check if list_value is output from prim::ListConstruct
+# This is usually called before _unpack_list to ensure the list can be
+# unpacked.
+def _is_packed_list(list_value):
+    return _is_value(
+        list_value) and list_value.node().kind() == 'prim::ListConstruct'
+
+
+def parse_args(*arg_descriptors):
+
+    def decorator(fn):
+        fn._arg_descriptors = arg_descriptors
+
+        def wrapper(g, *args):
+            # some args may be optional, so the length may be smaller
+            assert len(arg_descriptors) >= len(args)
+            args = [
+                _parse_arg(arg, arg_desc)
+                for arg, arg_desc in zip(args, arg_descriptors)
+            ]
+            return fn(g, *args)
+
+        # In Python 2 functools.wraps chokes on partially applied functions, so
+        # we need this as a workaround
+        try:
+            wrapper = wraps(fn)(wrapper)
+        except Exception:
+            pass
+        return wrapper
+
+    return decorator
+
+
+def _scalar(x):
+    """Convert a scalar tensor into a Python value."""
+    assert x.numel() == 1
+    return x.item()
+
+
+def _if_scalar_type_as(g, self, tensor):
+    """Convert self into the same type of tensor, as necessary."""
+    if isinstance(self, torch._C.Value):
+        return self
+
+    scalar_type = tensor.type().scalarType()
+    if scalar_type:
+        ty = scalar_type.lower()
+        return getattr(self, ty)()
+
+    return self
+
+
+def _is_none(x):
+    return x.node().mustBeNone()
+
+
+def _is_value(x):
+    return isinstance(x, torch._C.Value)
+
+
+def _is_tensor_list(x):
+    return x.type().isSubtypeOf(ListType.ofTensors())
+
+
+def _unimplemented(op, msg):
+    warnings.warn('ONNX export failed on ' + op + ' because ' + msg +
+                  ' not supported')
+
+
+def _try_get_scalar_type(*args):
+    for arg in args:
+        try:
+            return arg.type().scalarType()
+        except RuntimeError:
+            pass
+    return None
+
+
+def _topk_helper(g, input, k, dim, largest=True, sorted=False, out=None):
+    if out is not None:
+        _unimplemented('TopK', 'Out parameter is not supported')
+    if not _is_value(k):
+        k = g.op('Constant', value_t=torch.tensor([k], dtype=torch.int64))
+    else:
+        k = g.op('Reshape', k, g.op('Constant', value_t=torch.tensor([1])))
+    return g.op(
+        'TopK',
+        input,
+        k,
+        axis_i=dim,
+        largest_i=largest,
+        sorted_i=sorted,
+        outputs=2)
+
+
+def _slice_helper(g,
+                  input,
+                  axes,
+                  starts,
+                  ends,
+                  steps=None,
+                  dynamic_slice=False):
+    # TODO(ruobing): add support for opset<10
+    from torch.onnx.symbolic_opset10 import _slice
+    return _slice(g, input, axes, starts, ends, steps, dynamic_slice)
+
+
+def _unsqueeze_helper(g, input, dim):
+    from torch.onnx.symbolic_opset9 import unsqueeze
+    return unsqueeze(g, input, dim)
+
+
+def _interpolate_size_to_scales(g, input, output_size, dim):
+    output_size = _maybe_get_const(output_size, 'is')
+    if _is_value(output_size):
+        offset = 2
+        offsets = g.op(
+            'Constant', value_t=torch.ones(offset, dtype=torch.float32))
+        dividend = g.op(
+            'Cast', output_size, to_i=cast_pytorch_to_onnx['Float'])
+        divisor = _slice_helper(
+            g, g.op('Shape', input), axes=[0], ends=[maxsize], starts=[offset])
+        divisor = g.op('Cast', divisor, to_i=cast_pytorch_to_onnx['Float'])
+        scale_dims = g.op('Div', dividend, divisor)
+        scales = g.op('Concat', offsets, scale_dims, axis_i=0)
+    else:
+        scales_constant = [
+            1. if i < 2 else float(output_size[-(dim - i)]) /
+            float(input.type().sizes()[-(dim - i)]) for i in range(0, dim)
+        ]
+        scales = g.op(
+            'Constant',
+            value_t=torch.tensor(scales_constant, dtype=torch.float32))
+    return scales
+
+
+def _interpolate_get_scales_if_available(g, scales):
+    if len(scales) == 0:
+        return None
+    available_scales = _maybe_get_const(scales[0],
+                                        'fs') != -1 and not _is_none(scales[0])
+
+    if not available_scales:
+        return None
+
+    offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32))
+    scales_list = g.op(
+        'Constant', value_t=torch.tensor(_maybe_get_const(scales[0], 'fs')))
+    # modify to support PyTorch==1.7.0
+    # https://github.com/pytorch/pytorch/blob/75ee5756715e7161314ce037474843b68f69fc04/torch/onnx/symbolic_helper.py#L375 # noqa: E501
+    scales = g.op('Concat', offsets, scales_list, axis_i=0)
+    return scales
+
+
+def _get_interpolate_attributes(g, mode, args):
+    if mode == 'nearest':
+        align_corners = None
+        scales = args[0:]
+    else:
+        align_corners = args[0]
+        scales = args[1:]
+    scales = _interpolate_get_scales_if_available(g, scales)
+    return scales, align_corners
+
+
+def _interpolate_get_scales(g, scale_factor, dim):
+    offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32))
+    if isinstance(scale_factor.type(), torch._C.ListType):
+        return g.op('Concat', offsets, scale_factor, axis_i=0)
+    else:
+        scale_factor = _unsqueeze_helper(g, scale_factor, 0)
+        scale_factor = g.op(
+            'Cast', scale_factor, to_i=cast_pytorch_to_onnx['Float'])
+        scales = [scale_factor for i in range(dim - 2)]
+    scale_factor = g.op('Concat', offsets, *scales, axis_i=0)
+    return scale_factor
+
+
+def _size_helper(g, self, dim):
+    full_shape = g.op('Shape', self)
+    from torch.onnx.symbolic_opset9 import select
+    return select(g, full_shape, g.op('Constant', value_t=torch.tensor([0])),
+                  dim)
+
+
+def _avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override,
+                    name):
+    if divisor_override and divisor_override.node().kind() != 'prim::Constant':
+        return _unimplemented(name, 'divisor_override')
+    if not stride:
+        stride = kernel_size
+    padding = tuple(tuple_fn(padding))
+    return padding
+
+
+# Metaprogram symbolics for each ATen native specialized cast operator.
+# For e.g. we specify a function named `_cast_uint8_t` that instantiates an
+# ONNX cast node with `to` attribute 'UINT8'
+#
+# TODO: remove these once we support Type's in the JIT IR and we can once again
+# use the unified toType operator
+cast_pytorch_to_onnx = {
+    'Byte': torch.onnx.TensorProtoDataType.UINT8,
+    'Char': torch.onnx.TensorProtoDataType.INT8,
+    'Double': torch.onnx.TensorProtoDataType.DOUBLE,
+    'Float': torch.onnx.TensorProtoDataType.FLOAT,
+    'Half': torch.onnx.TensorProtoDataType.FLOAT16,
+    'Int': torch.onnx.TensorProtoDataType.INT32,
+    'Long': torch.onnx.TensorProtoDataType.INT64,
+    'Short': torch.onnx.TensorProtoDataType.INT16,
+    'Bool': torch.onnx.TensorProtoDataType.BOOL,
+    'ComplexFloat': torch.onnx.TensorProtoDataType.COMPLEX64,
+    'ComplexDouble': torch.onnx.TensorProtoDataType.COMPLEX128,
+    'Undefined': torch.onnx.TensorProtoDataType.UNDEFINED,
+}
+
+# Global set to store the list of quantized operators in the network.
+# This is currently only used in the conversion of quantized ops from PT
+# -> C2 via ONNX.
+_quantized_ops = set()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4498ab087866b6af970b70b56636b90206655ef
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/__init__.py
@@ -0,0 +1,3 @@
+from .core import simplify
+
+__all__ = ['simplify']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/common.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..6490bd643efb29bb42ed1ea1699922bcca3722f0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/common.py
@@ -0,0 +1,43 @@
+import copy
+import warnings
+
+import onnx
+
+
+def add_suffix2name(ori_model, suffix='__', verify=False):
+    """Simplily add a suffix to the name of node, which has a numeric name."""
+    # check if has special op, which has subgraph.
+    special_ops = ('If', 'Loop')
+    for node in ori_model.graph.node:
+        if node.op_type in special_ops:
+            warnings.warn(f'This model has special op: {node.op_type}.')
+            return ori_model
+
+    model = copy.deepcopy(ori_model)
+
+    def need_update(name):
+        return name.isnumeric()
+
+    def update_name(nodes):
+        for node in nodes:
+            if need_update(node.name):
+                node.name += suffix
+
+    update_name(model.graph.initializer)
+    update_name(model.graph.input)
+    update_name(model.graph.output)
+
+    for i, node in enumerate(ori_model.graph.node):
+        # process input of node
+        for j, name in enumerate(node.input):
+            if need_update(name):
+                model.graph.node[i].input[j] = name + suffix
+
+        # process output of node
+        for j, name in enumerate(node.output):
+            if need_update(name):
+                model.graph.node[i].output[j] = name + suffix
+    if verify:
+        onnx.checker.check_model(model)
+
+    return model
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/core.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/core.py
new file mode 100644
index 0000000000000000000000000000000000000000..20428c81ca5cc0329e2ff65b323b9a7ecc9191e1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/simplify/core.py
@@ -0,0 +1,537 @@
+# This file is modified from https://github.com/daquexian/onnx-simplifier
+import copy
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional, Sequence, Union
+
+import numpy as np  # type: ignore
+import onnx  # type: ignore
+import onnx.helper  # type: ignore
+import onnx.numpy_helper
+import onnx.shape_inference  # type: ignore
+import onnxoptimizer  # type: ignore
+import onnxruntime as rt  # type: ignore
+
+from .common import add_suffix2name
+
+TensorShape = List[int]
+TensorShapes = Dict[Optional[str], TensorShape]
+
+
+def add_features_to_output(m: onnx.ModelProto,
+                           nodes: List[onnx.NodeProto]) -> None:
+    """Add features to output in pb, so that ONNX Runtime will output them.
+
+    Args:
+        m (onnx.ModelProto): Input ONNX model.
+        nodes (List[onnx.NodeProto]): List of ONNX nodes, whose outputs
+        will be added into the graph output.
+    """
+    for node in nodes:
+        for output in node.output:
+            m.graph.output.extend([onnx.ValueInfoProto(name=output)])
+
+
+def get_shape_from_value_info_proto(v: onnx.ValueInfoProto) -> List[int]:
+    return [dim.dim_value for dim in v.type.tensor_type.shape.dim]
+
+
+def get_value_info_all(m: onnx.ModelProto,
+                       name: str) -> Optional[onnx.ValueInfoProto]:
+    for v in m.graph.value_info:
+        if v.name == name:
+            return v
+
+    for v in m.graph.input:
+        if v.name == name:
+            return v
+
+    for v in m.graph.output:
+        if v.name == name:
+            return v
+
+    return None
+
+
+def get_shape(m: onnx.ModelProto, name: str) -> TensorShape:
+    """Get shape info of a node in a model.
+
+    Args:
+        m (onnx.ModelProto): Input model.
+        name (str): Name of a node.
+
+    Returns:
+        TensorShape: Shape of a node.
+
+    Note:
+        This method relies on onnx shape inference, which is not reliable.
+        So only use it on input or output tensors
+    """
+    v = get_value_info_all(m, name)
+    if v is not None:
+        return get_shape_from_value_info_proto(v)
+    raise RuntimeError('Cannot get shape of "{}"'.format(name))
+
+
+def get_elem_type(m: onnx.ModelProto, name: str) -> Optional[int]:
+    v = get_value_info_all(m, name)
+    if v is not None:
+        return v.type.tensor_type.elem_type
+    return None
+
+
+def get_np_type_from_elem_type(elem_type: int) -> int:
+    """Map element type from ONNX to dtype of numpy.
+
+    Args:
+        elem_type (int): Element type index in ONNX.
+
+    Returns:
+        int: Data type in numpy.
+    """
+    # from https://github.com/onnx/onnx/blob/e5e9a539f550f07ec156812484e8d4f33fb91f88/onnx/onnx.proto#L461 # noqa: E501
+    sizes = (None, np.float32, np.uint8, np.int8, np.uint16, np.int16,
+             np.int32, np.int64, str, np.bool, np.float16, np.double,
+             np.uint32, np.uint64, np.complex64, np.complex128, np.float16)
+    assert len(sizes) == 17
+    size = sizes[elem_type]
+    assert size is not None
+    return size
+
+
+def get_input_names(model: onnx.ModelProto) -> List[str]:
+    """Get input names of a model.
+
+    Args:
+        model (onnx.ModelProto): Input ONNX model.
+
+    Returns:
+        List[str]: List of input names.
+    """
+    input_names = list(
+        set([ipt.name for ipt in model.graph.input]) -
+        set([x.name for x in model.graph.initializer]))
+    return input_names
+
+
+def add_initializers_into_inputs(model: onnx.ModelProto) -> onnx.ModelProto:
+    """add initializers into inputs of a model.
+
+    Args:
+        model (onnx.ModelProto): Input ONNX model.
+
+    Returns:
+        onnx.ModelProto: Updated ONNX model.
+    """
+    for x in model.graph.initializer:
+        input_names = [x.name for x in model.graph.input]
+        if x.name not in input_names:
+            shape = onnx.TensorShapeProto()
+            for dim in x.dims:
+                shape.dim.extend(
+                    [onnx.TensorShapeProto.Dimension(dim_value=dim)])
+            model.graph.input.extend([
+                onnx.ValueInfoProto(
+                    name=x.name,
+                    type=onnx.TypeProto(
+                        tensor_type=onnx.TypeProto.Tensor(
+                            elem_type=x.data_type, shape=shape)))
+            ])
+    return model
+
+
+def generate_rand_input(
+        model: onnx.ModelProto,
+        input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]:
+    """Generate random input for a model.
+
+    Args:
+        model (onnx.ModelProto): Input ONNX model.
+        input_shapes (TensorShapes, optional): Input shapes of the model.
+
+    Returns:
+        Dict[str, np.ndarray]: Generated inputs of `np.ndarray`.
+    """
+    if input_shapes is None:
+        input_shapes = {}
+    input_names = get_input_names(model)
+    full_input_shapes = {ipt: get_shape(model, ipt) for ipt in input_names}
+    assert None not in input_shapes
+    full_input_shapes.update(input_shapes)  # type: ignore
+    for key in full_input_shapes:
+        if np.prod(full_input_shapes[key]) <= 0:
+            raise RuntimeError(f'The shape of input "{key}" has dynamic size, \
+                please determine the input size manually.')
+
+    inputs = {
+        ipt: np.array(
+            np.random.rand(*full_input_shapes[ipt]),
+            dtype=get_np_type_from_elem_type(get_elem_type(model, ipt)))
+        for ipt in input_names
+    }
+    return inputs
+
+
+def get_constant_nodes(m: onnx.ModelProto) -> List[onnx.NodeProto]:
+    """Collect constant nodes from a model.
+
+    Args:
+        m (onnx.ModelProto): Input ONNX model.
+
+    Returns:
+        List[onnx.NodeProto]: List of constant nodes.
+    """
+
+    const_nodes = []
+    const_tensors = [x.name for x in m.graph.initializer]
+    const_tensors.extend([
+        node.output[0] for node in m.graph.node if node.op_type == 'Constant'
+    ])
+    # The output shape of some node types is determined by the input value
+    # we consider the output of this node doesn't have constant shape,
+    # so we do not simplify a such node even if the node is Shape op
+    dynamic_tensors = []
+
+    def is_dynamic(node):
+        if node.op_type in ['NonMaxSuppression', 'NonZero', 'Unique'
+                            ] and node.input[0] not in const_tensors:
+            return True
+        if node.op_type in [
+                'Reshape', 'Expand', 'Upsample', 'ConstantOfShape'
+        ] and len(node.input) > 1 and node.input[1] not in const_tensors:
+            return True
+        if node.op_type in ['Resize'] and (
+            (len(node.input) > 2 and node.input[2] not in const_tensors) or
+            (len(node.input) > 3
+             and node.input[3] not in const_tensors)):  # noqa: E129
+            return True
+        return False
+
+    for node in m.graph.node:
+        if any(x in dynamic_tensors for x in node.input):
+            dynamic_tensors.extend(node.output)
+        elif node.op_type == 'Shape':
+            const_nodes.append(node)
+            const_tensors.extend(node.output)
+        elif is_dynamic(node):
+            dynamic_tensors.extend(node.output)
+        elif all([x in const_tensors for x in node.input]):
+            const_nodes.append(node)
+            const_tensors.extend(node.output)
+    return copy.deepcopy(const_nodes)
+
+
+def forward(
+        model: onnx.ModelProto,
+        inputs: Dict[str, np.ndarray] = None,
+        input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]:
+    """Run forward on a model.
+
+    Args:
+        model (onnx.ModelProto): Input ONNX model.
+        inputs (Dict[str, np.ndarray], optional): Inputs of the model.
+        input_shapes (TensorShapes, optional): Input shapes of the model.
+
+    Returns:
+        Dict[str, np.ndarray]: Outputs of the model.
+    """
+    if input_shapes is None:
+        input_shapes = {}
+    sess_options = rt.SessionOptions()
+    # load custom lib for onnxruntime in mmcv
+    ort_custom_op_path = ''
+    try:
+        from mmcv.ops import get_onnxruntime_op_path
+        ort_custom_op_path = get_onnxruntime_op_path()
+    except ImportError:
+        pass
+    if os.path.exists(ort_custom_op_path):
+        sess_options.register_custom_ops_library(ort_custom_op_path)
+    sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0)
+    sess_options.log_severity_level = 3
+    sess = rt.InferenceSession(
+        model.SerializeToString(),
+        sess_options=sess_options,
+        providers=['CPUExecutionProvider'])
+    if inputs is None:
+        inputs = generate_rand_input(model, input_shapes=input_shapes)
+    outputs = [x.name for x in sess.get_outputs()]
+    run_options = rt.RunOptions()
+    run_options.log_severity_level = 3
+    res = OrderedDict(
+        zip(outputs, sess.run(outputs, inputs, run_options=run_options)))
+    return res
+
+
+def forward_for_node_outputs(
+        model: onnx.ModelProto,
+        nodes: List[onnx.NodeProto],
+        input_shapes: Optional[TensorShapes] = None,
+        inputs: Optional[Dict[str,
+                              np.ndarray]] = None) -> Dict[str, np.ndarray]:
+    if input_shapes is None:
+        input_shapes = {}
+    model = copy.deepcopy(model)
+    add_features_to_output(model, nodes)
+    res = forward(model, inputs=inputs, input_shapes=input_shapes)
+    return res
+
+
+def insert_elem(repeated_container, index: int, element):
+    repeated_container.extend([repeated_container[-1]])
+    for i in reversed(range(index + 1, len(repeated_container) - 1)):
+        repeated_container[i].CopyFrom(repeated_container[i - 1])
+    repeated_container[index].CopyFrom(element)
+
+
+def eliminate_const_nodes(model: onnx.ModelProto,
+                          const_nodes: List[onnx.NodeProto],
+                          res: Dict[str, np.ndarray]) -> onnx.ModelProto:
+    """Eliminate redundant constant nodes from model.
+
+    Args:
+        model (onnx.ModelProto): The original ONNX model.
+        const_nodes (List[onnx.NodeProto]):
+            Constant nodes detected by `get_constant_nodes`.
+        res (Dict[str, np.ndarray]): Outputs of the model.
+
+    Returns:
+        onnx.ModelProto: The simplified onnx model.
+    """
+
+    for i, node in enumerate(model.graph.node):
+        if node in const_nodes:
+            for output in node.output:
+                new_node = copy.deepcopy(node)
+                new_node.name = 'node_' + output
+                new_node.op_type = 'Constant'
+                new_attr = onnx.helper.make_attribute(
+                    'value',
+                    onnx.numpy_helper.from_array(res[output], name=output))
+                del new_node.input[:]
+                del new_node.attribute[:]
+                del new_node.output[:]
+                new_node.output.extend([output])
+                new_node.attribute.extend([new_attr])
+                insert_elem(model.graph.node, i + 1, new_node)
+            del model.graph.node[i]
+
+    return model
+
+
+def optimize(model: onnx.ModelProto, skip_fuse_bn: bool,
+             skipped_optimizers: Optional[Sequence[str]]) -> onnx.ModelProto:
+    """Perform optimization on an ONNX model. Before simplifying, use this
+    method to generate value_info. After simplifying, use this method to fold
+    constants generated in previous step into initializer, and eliminate unused
+    constants.
+
+    Args:
+        model (onnx.ModelProto): The input ONNX model.
+        skip_fuse_bn (bool): Whether to skip fuse bn.
+        skipped_optimizers (Sequence[str]): List of optimizers to be skipped.
+
+    Returns:
+        onnx.ModelProto: The optimized model.
+    """
+    # Due to a onnx bug, https://github.com/onnx/onnx/issues/2417,
+    # we need to add missing initializers into inputs
+    onnx.checker.check_model(model)
+    input_num = len(model.graph.input)
+    model = add_initializers_into_inputs(model)
+    onnx.helper.strip_doc_string(model)
+    onnx.checker.check_model(model)
+    optimizers_list = [
+        'eliminate_deadend', 'eliminate_nop_dropout', 'eliminate_nop_cast',
+        'eliminate_nop_monotone_argmax', 'eliminate_nop_pad',
+        'extract_constant_to_initializer', 'eliminate_unused_initializer',
+        'eliminate_nop_transpose', 'eliminate_identity',
+        'fuse_add_bias_into_conv', 'fuse_consecutive_concats',
+        'fuse_consecutive_log_softmax', 'fuse_consecutive_reduce_unsqueeze',
+        'fuse_consecutive_squeezes', 'fuse_consecutive_transposes',
+        'fuse_matmul_add_bias_into_gemm', 'fuse_pad_into_conv',
+        'fuse_transpose_into_gemm'
+    ]
+    if not skip_fuse_bn:
+        optimizers_list.append('fuse_bn_into_conv')
+    if skipped_optimizers is not None:
+        for opt in skipped_optimizers:
+            try:
+                optimizers_list.remove(opt)
+            except ValueError:
+                pass
+
+    model = onnxoptimizer.optimize(model, optimizers_list, fixed_point=True)
+    if model.ir_version > 3:
+        del model.graph.input[input_num:]
+    onnx.checker.check_model(model)
+    return model
+
+
+def check(model_opt: onnx.ModelProto,
+          model_ori: onnx.ModelProto,
+          n_times: int = 5,
+          input_shapes: Optional[TensorShapes] = None,
+          inputs: Optional[List[Dict[str, np.ndarray]]] = None) -> bool:
+    """Check model before and after simplify.
+
+    Args:
+        model_opt (onnx.ModelProto): Optimized model.
+        model_ori (onnx.ModelProto): Original model.
+        n_times (int, optional): Number of times to compare models.
+        input_shapes (TensorShapes, optional): Input shapes of the model.
+        inputs (List[Dict[str, np.ndarray]], optional): Inputs of the model.
+
+    Returns:
+        bool: `True` means the outputs of two models have neglectable
+            numeric difference.
+    """
+
+    if input_shapes is None:
+        input_shapes = {}
+    onnx.checker.check_model(model_opt)
+    if inputs is not None:
+        n_times = min(n_times, len(inputs))
+    for i in range(n_times):
+        print(f'Checking {i}/{n_times}...')
+        if inputs is None:
+            model_input = generate_rand_input(
+                model_opt, input_shapes=input_shapes)
+        else:
+            model_input = inputs[i]
+        res_opt = forward(model_opt, inputs=model_input)
+        res_ori = forward(model_ori, inputs=model_input)
+
+        for name in res_opt.keys():
+            if not np.allclose(
+                    res_opt[name], res_ori[name], rtol=1e-4, atol=1e-5):
+                print(
+                    'Tensor {} changes after simplifying. The max diff is {}.'.
+                    format(name,
+                           np.max(np.abs(res_opt[name] - res_ori[name]))))
+                print('Note that the checking is not always correct.')
+                print('After simplifying:')
+                print(res_opt[name])
+                print('Before simplifying:')
+                print(res_ori[name])
+                print('----------------')
+                return False
+    return True
+
+
+def clean_constant_nodes(const_nodes: List[onnx.NodeProto],
+                         res: Dict[str, np.ndarray]):
+    """Clean constant nodes.
+
+    Args:
+        const_nodes (List[onnx.NodeProto]): List of constant nodes.
+        res (Dict[str, np.ndarray]): The forward result of model.
+
+    Returns:
+        List[onnx.NodeProto]:  The constant nodes which have an output in res.
+
+    Notes:
+        It seems not needed since commit 6f2a72, but maybe it still prevents
+        some unknown bug.
+    """
+
+    return [node for node in const_nodes if node.output[0] in res]
+
+
+def check_and_update_input_shapes(model: onnx.ModelProto,
+                                  input_shapes: TensorShapes) -> TensorShapes:
+    input_names = get_input_names(model)
+    if None in input_shapes:
+        if len(input_names) == 1:
+            input_shapes[input_names[0]] = input_shapes[None]
+            del input_shapes[None]
+        else:
+            raise RuntimeError('The model has more than 1 inputs!')
+    for x in input_shapes:
+        if x not in input_names:
+            raise RuntimeError(f'The model doesn\'t have input named "{x}"')
+    return input_shapes
+
+
+def simplify(model: Union[str, onnx.ModelProto],
+             inputs: Sequence[Dict[str, np.ndarray]] = None,
+             output_file: str = None,
+             perform_optimization: bool = True,
+             skip_fuse_bn: bool = False,
+             skip_shape_inference: bool = True,
+             input_shapes: Dict[str, Sequence[int]] = None,
+             skipped_optimizers: Sequence[str] = None) -> onnx.ModelProto:
+    """Simplify and optimize an onnx model.
+
+    For models from detection and segmentation, it is strongly suggested to
+    input multiple input images for verification.
+
+    Arguments:
+        model (str or onnx.ModelProto): path of model or loaded model object.
+        inputs (optional, Sequence[Dict[str, np.ndarray]]): inputs of model.
+        output_file (optional, str): output file to save simplified model.
+        perform_optimization (optional, bool): whether to perform optimization.
+        skip_fuse_bn (optional, bool): whether to skip fusing bn layer.
+        skip_shape_inference (optional, bool): whether to skip shape inference.
+        input_shapes (optional, Dict[str, Sequence[int]]):
+            the shapes of model inputs.
+        skipped_optimizers (optional, Sequence[str]):
+            the names of optimizer to be skipped.
+
+    Returns:
+        onnx.ModelProto: simplified and optimized onnx model.
+
+    Example:
+        >>> import onnx
+        >>> import numpy as np
+        >>>
+        >>> from mmcv.onnx import simplify
+        >>>
+        >>> dummy_input = np.random.randn(1, 3, 224, 224).astype(np.float32)
+        >>> input = {'input':dummy_input}
+        >>> input_file = 'sample.onnx'
+        >>> output_file = 'slim.onnx'
+        >>> model = simplify(input_file, [input], output_file)
+    """
+    if input_shapes is None:
+        input_shapes = {}
+    if isinstance(model, str):
+        model = onnx.load(model)
+    # rename op with numeric name for issue
+    # https://github.com/onnx/onnx/issues/2613
+    model = add_suffix2name(model)
+    onnx.checker.check_model(model)
+    model_ori = copy.deepcopy(model)
+    numel_node_ori = len(model_ori.graph.node)
+    if not skip_shape_inference:
+        model = onnx.shape_inference.infer_shapes(model)
+
+    input_shapes = check_and_update_input_shapes(model, input_shapes)
+
+    if perform_optimization:
+        model = optimize(model, skip_fuse_bn, skipped_optimizers)
+
+    const_nodes = get_constant_nodes(model)
+    feed_inputs = None if inputs is None else inputs[0]
+    res = forward_for_node_outputs(
+        model, const_nodes, input_shapes=input_shapes, inputs=feed_inputs)
+    const_nodes = clean_constant_nodes(const_nodes, res)
+    model = eliminate_const_nodes(model, const_nodes, res)
+    onnx.checker.check_model(model)
+
+    if perform_optimization:
+        model = optimize(model, skip_fuse_bn, skipped_optimizers)
+
+    check_ok = check(
+        model_ori, model, input_shapes=input_shapes, inputs=inputs)
+
+    assert check_ok, 'Check failed for the simplified model!'
+    numel_node_slim = len(model.graph.node)
+    print(f'Number of nodes: {numel_node_ori} -> {numel_node_slim}')
+
+    if output_file is not None:
+        save_dir, _ = os.path.split(output_file)
+        if save_dir:
+            os.makedirs(save_dir, exist_ok=True)
+        onnx.save(model, output_file)
+    return model
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/symbolic.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/symbolic.py
new file mode 100644
index 0000000000000000000000000000000000000000..837a77de84423e74f0bb81810028bb50468867cf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/onnx/symbolic.py
@@ -0,0 +1,406 @@
+"""Modified from https://github.com/pytorch/pytorch."""
+import os
+
+import numpy as np
+import torch
+from torch.nn.modules.utils import _pair, _single, _triple
+from torch.onnx.symbolic_helper import parse_args
+from torch.onnx.symbolic_registry import register_op
+
+from .onnx_utils import symbolic_helper as sym_help
+
+
+def _interpolate(name, dim, interpolate_mode):
+
+    def symbolic_fn(g, input, output_size, *args):
+        scales, align_corners = sym_help._get_interpolate_attributes(
+            g, interpolate_mode, args)
+        align_corners = sym_help._maybe_get_scalar(align_corners)
+        transformation_mode = 'asymmetric' \
+            if interpolate_mode == 'nearest' \
+            else 'align_corners' if align_corners else 'pytorch_half_pixel'
+        empty_tensor = g.op(
+            'Constant', value_t=torch.tensor([], dtype=torch.float32))
+
+        if scales is None:
+            if 'ONNX_BACKEND' in os.environ and os.environ[
+                    'ONNX_BACKEND'] == 'TensorRT':
+                input_size = input.type().sizes()
+                # slice the first two dim
+                input_size = input_size[:2]
+                # convert output_size to int type
+                output_size = sym_help._maybe_get_const(output_size, 'is')
+                input_size.extend(output_size)
+                output_size = g.op(
+                    'Constant',
+                    value_t=torch.tensor(input_size, dtype=torch.int64))
+            else:
+                input_size = g.op('Shape', input)
+                input_size_beg = sym_help._slice_helper(
+                    g, input_size, axes=[0], ends=[2], starts=[0])
+                output_size = g.op(
+                    'Cast',
+                    output_size,
+                    to_i=sym_help.cast_pytorch_to_onnx['Long'])
+                output_size = g.op(
+                    'Concat', input_size_beg, output_size, axis_i=0)
+            scales = g.op(
+                'Constant', value_t=torch.tensor([], dtype=torch.float32))
+            return g.op(
+                'Resize',
+                input,
+                empty_tensor,
+                # roi only takes effect whith
+                # coordinate_transformation_mode="tf_crop_and_resize"
+                scales,  # scales is not needed since we are sending out_size
+                output_size,
+                coordinate_transformation_mode_s=transformation_mode,
+                cubic_coeff_a_f=-0.75,  # only valid when mode="cubic"
+                mode_s=interpolate_mode,  # nearest, linear, or cubic
+                nearest_mode_s='floor')  # only valid when mode="nearest"
+        else:
+            return g.op(
+                'Resize',
+                input,
+                empty_tensor,
+                # roi only takes effect with
+                # coordinate_transformation_mode="tf_crop_and_resize"
+                scales,  # scales is not needed since we are sending out_size
+                coordinate_transformation_mode_s=transformation_mode,
+                cubic_coeff_a_f=-0.75,  # only valid when mode="cubic"
+                mode_s=interpolate_mode,  # nearest, linear, or cubic
+                nearest_mode_s='floor')  # only valid when mode="nearest"
+
+    return symbolic_fn
+
+
+upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest')
+upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest')
+upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest')
+upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear')
+upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear')
+upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear')
+upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic')
+
+
+@parse_args('v', 'v', 'i', 'i', 'i', 'none')
+def topk(g, self, k, dim, largest, sorted, out=None):
+    return sym_help._topk_helper(
+        g, self, k, dim, largest=largest, sorted=sorted, out=out)
+
+
+def masked_select(g, self, mask):
+    from torch.onnx.symbolic_opset9 import nonzero, expand_as
+    index = nonzero(g, expand_as(g, mask, self))
+    return g.op('GatherND', self, index)
+
+
+def _prepare_onnx_paddings(g, dim, pad):
+    pad_len = torch.onnx.symbolic_opset9.size(
+        g, pad, g.op('Constant', value_t=torch.tensor([0])))
+    # Set extension = [0] * (dim * 2 - len(pad))
+    extension = g.op(
+        'Sub',
+        g.op('Mul',
+             g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)),
+             g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))),
+        pad_len)
+    pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long'])
+    paddings = g.op(
+        'Concat',
+        pad,
+        g.op(
+            'ConstantOfShape',
+            extension,
+            value_t=torch.tensor([0], dtype=torch.int64)),
+        axis_i=0)
+    paddings = g.op('Reshape', paddings,
+                    g.op('Constant', value_t=torch.tensor([-1, 2])))
+    paddings = g.op(
+        'Transpose',
+        torch.onnx.symbolic_opset10.flip(g, paddings, [0]),
+        perm_i=[1, 0])
+    paddings = g.op('Reshape', paddings,
+                    g.op('Constant', value_t=torch.tensor([-1])))
+    padding_c = g.op(
+        'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long'])
+    return padding_c
+
+
+def constant_pad_nd(g, input, padding, value=None):
+    mode = 'constant'
+    value = sym_help._maybe_get_scalar(value)
+    value = sym_help._if_scalar_type_as(g, value, input)
+    pad = _prepare_onnx_paddings(g, input.type().dim(), padding)
+    return g.op('Pad', input, pad, value, mode_s=mode)
+
+
+def reflection_pad(g, input, padding):
+    mode = 'reflect'
+    paddings = _prepare_onnx_paddings(g, input.type().dim(), padding)
+    return g.op('Pad', input, paddings, mode_s=mode)
+
+
+reflection_pad1d = reflection_pad
+reflection_pad2d = reflection_pad
+reflection_pad3d = reflection_pad
+
+
+def _avg_pool(name, tuple_fn):
+
+    @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none')
+    def symbolic_fn(g,
+                    input,
+                    kernel_size,
+                    stride,
+                    padding,
+                    ceil_mode,
+                    count_include_pad,
+                    divisor_override=None):
+        padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size,
+                                           stride, divisor_override, name)
+        if not stride:
+            stride = kernel_size
+        if count_include_pad:
+            input = g.op(
+                'Pad',
+                input,
+                g.op(
+                    'Constant',
+                    value_t=torch.tensor(((0, ) * 2 + padding) * 2)),
+                mode_s='constant')
+            padding = (0, ) * len(padding)
+        output = g.op(
+            'AveragePool',
+            input,
+            kernel_shape_i=tuple_fn(kernel_size),
+            strides_i=tuple_fn(stride),
+            pads_i=padding * 2,
+            ceil_mode_i=ceil_mode)
+        return output
+
+    return symbolic_fn
+
+
+avg_pool1d = _avg_pool('avg_pool1d', _single)
+avg_pool2d = _avg_pool('avg_pool2d', _pair)
+avg_pool3d = _avg_pool('avg_pool3d', _triple)
+
+
+def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d,
+                                  padding_d, stride_d):
+    # Input is always 4-D (N, C, H, W)
+    # Calculate indices of sliding blocks along spatial dimension
+    # Slide kernel over input each dim d:
+    # each dimension d ranges from 0 to
+    # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1)
+    # with steps = stride
+
+    blocks_d = g.op('Add', input_d,
+                    g.op('Constant', value_t=torch.tensor(padding_d * 2)))
+    blocks_d = g.op(
+        'Sub', blocks_d,
+        g.op(
+            'Constant',
+            value_t=torch.tensor(dilation_d * (kernel_size_d - 1))))
+
+    # Stride kernel over input and find starting indices along dim d
+    blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)),
+                            blocks_d,
+                            g.op('Constant', value_t=torch.tensor(stride_d)))
+
+    # Apply dilation on kernel and find its indices along dim d
+    kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d)
+    kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid]))
+
+    # Broadcast and add kernel staring positions (indices) with
+    # kernel_grid along dim d, to get block indices along dim d
+    blocks_d_indices = g.op(
+        'Unsqueeze', blocks_d_indices, axes_i=[0])  # Reshape to [1, -1]
+    kernel_mask = g.op('Reshape', kernel_grid,
+                       g.op('Constant', value_t=torch.tensor([-1, 1])))
+    block_mask = g.op('Add', blocks_d_indices, kernel_mask)
+
+    return block_mask
+
+
+def _get_im2col_padded_input(g, input, padding_h, padding_w):
+    # Input is always 4-D tensor (N, C, H, W)
+    # Padding tensor has the following format: (padding_h, padding_w)
+    # Reshape the padding to follow ONNX format:
+    # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...)
+    pad = g.op(
+        'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2))
+    return g.op('Pad', input, pad)
+
+
+def _get_im2col_output_shape(g, input, kernel_h, kernel_w):
+    batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0)))
+    channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1)))
+    channel_unfolded = g.op(
+        'Mul', channel_dim,
+        g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w)))
+
+    return g.op(
+        'Concat',
+        g.op('Unsqueeze', batch_dim, axes_i=[0]),
+        g.op('Unsqueeze', channel_unfolded, axes_i=[0]),
+        g.op('Constant', value_t=torch.tensor([-1])),
+        axis_i=0)
+
+
+def size(g, self, dim=None):
+    if dim is None:
+        return g.op('Shape', self)
+    return sym_help._size_helper(g, self, dim)
+
+
+@parse_args('v', 'is', 'is', 'is', 'is')
+def im2col(g, input, kernel_size, dilation, padding, stride):
+    # Input is always 4-D tensor (N, C, H, W)
+    # All other args are int[2]
+
+    input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2)))
+    input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3)))
+
+    stride_h, stride_w = stride[0], stride[1]
+    padding_h, padding_w = padding[0], padding[1]
+    dilation_h, dilation_w = dilation[0], dilation[1]
+    kernel_h, kernel_w = kernel_size[0], kernel_size[1]
+
+    blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h,
+                                                       dilation_h, padding_h,
+                                                       stride_h)
+    blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w,
+                                                       dilation_w, padding_w,
+                                                       stride_w)
+
+    output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w)
+    padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w)
+
+    output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2)
+    output = g.op('Gather', output, blocks_col_indices, axis_i=4)
+    output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5])
+    return g.op('Reshape', output, output_shape)
+
+
+@parse_args('v', 'i')
+def one_hot(g, self, num_classes):
+    values = g.op('Constant', value_t=torch.LongTensor([0, 1]))
+    depth = g.op('Constant', value_t=torch.LongTensor([num_classes]))
+    return g.op('OneHot', self, depth, values, axis_i=-1)
+
+
+@parse_args('v', 'i', 'none')
+def softmax(g, input, dim, dtype=None):
+    input_dim = input.type().dim()
+    if input_dim:
+        # TODO: remove this as onnx opset 11 spec allows negative axes
+        if dim < 0:
+            dim = input_dim + dim
+        if input_dim == dim + 1:
+            softmax = g.op('Softmax', input, axis_i=dim)
+            if dtype and dtype.node().kind() != 'prim::Constant':
+                parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
+                softmax = g.op(
+                    'Cast',
+                    softmax,
+                    to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
+            return softmax
+
+    max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1)
+    input = g.op('Sub', input, max_value)
+    exp = g.op('Exp', input)
+    sum = g.op('ReduceSum', exp, axes_i=[dim])
+    softmax = g.op('Div', exp, sum)
+    if dtype and dtype.node().kind() != 'prim::Constant':
+        parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
+        softmax = g.op(
+            'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
+    return softmax
+
+
+def _adaptive_pool(name, type, tuple_fn, fn=None):
+
+    @parse_args('v', 'is')
+    def symbolic_fn(g, input, output_size):
+        if output_size == [1] * len(output_size) and type == 'AveragePool':
+            return g.op('GlobalAveragePool', input)
+        if not input.isCompleteTensor():
+            if output_size == [1] * len(output_size):
+                return g.op('GlobalMaxPool', input), None
+            raise NotImplementedError(
+                '[Adaptive pool]:input size not accessible')
+        dim = input.type().sizes()[2:]
+        if output_size == [1] * len(output_size) and type == 'MaxPool':
+            return g.op('GlobalMaxPool', input), None
+
+        # compute stride = floor(input_size / output_size)
+        s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]
+
+        # compute kernel_size = input_size - (output_size - 1) * stride
+        k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))]
+
+        # call max_poolxd_with_indices to get indices in the output
+        if type == 'MaxPool':
+            return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim),
+                      False)
+        output = g.op(
+            type,
+            input,
+            kernel_shape_i=tuple_fn(k),
+            strides_i=tuple_fn(s),
+            ceil_mode_i=False)
+        return output
+
+    return symbolic_fn
+
+
+adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool',
+                                     _single)
+adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool',
+                                     _pair)
+adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool',
+                                     _triple)
+
+
+def new_full(g,
+             self,
+             size,
+             fill_value,
+             dtype,
+             layout,
+             device,
+             pin_memory=False):
+    from torch.onnx.symbolic_opset9 import full
+    if dtype is None and self.isCompleteTensor():
+        dtype = self.type().scalarType()
+        dtype = sym_help.scalar_type_to_onnx.index(
+            sym_help.cast_pytorch_to_onnx[dtype])
+    return full(g, size, fill_value, dtype, layout, device, pin_memory)
+
+
+def register_extra_symbolics(opset=11):
+    register_op('one_hot', one_hot, '', opset)
+    register_op('im2col', im2col, '', opset)
+    register_op('topk', topk, '', opset)
+    register_op('softmax', softmax, '', opset)
+    register_op('constant_pad_nd', constant_pad_nd, '', opset)
+    register_op('reflection_pad1d', reflection_pad1d, '', opset)
+    register_op('reflection_pad2d', reflection_pad2d, '', opset)
+    register_op('reflection_pad3d', reflection_pad3d, '', opset)
+    register_op('avg_pool1d', avg_pool1d, '', opset)
+    register_op('avg_pool2d', avg_pool2d, '', opset)
+    register_op('avg_pool3d', avg_pool3d, '', opset)
+    register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset)
+    register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset)
+    register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset)
+    register_op('masked_select', masked_select, '', opset)
+    register_op('upsample_nearest1d', upsample_nearest1d, '', opset)
+    register_op('upsample_nearest2d', upsample_nearest2d, '', opset)
+    register_op('upsample_nearest3d', upsample_nearest3d, '', opset)
+    register_op('upsample_linear1d', upsample_linear1d, '', opset)
+    register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset)
+    register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset)
+    register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset)
+    register_op('new_full', new_full, '', opset)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b363156113f4aa51c7074fda885e260df8c571de
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/__init__.py
@@ -0,0 +1,45 @@
+from .bbox import bbox_overlaps
+from .box_iou_rotated import box_iou_rotated
+from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
+from .cc_attention import CrissCrossAttention
+from .corner_pool import CornerPool
+from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d
+from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack,
+                              ModulatedDeformRoIPoolPack, deform_roi_pool)
+from .deprecated_wrappers import Conv2d_deprecated as Conv2d
+from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d
+from .deprecated_wrappers import Linear_deprecated as Linear
+from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d
+from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss,
+                         sigmoid_focal_loss, softmax_focal_loss)
+from .info import (get_compiler_version, get_compiling_cuda_version,
+                   get_onnxruntime_op_path)
+from .masked_conv import MaskedConv2d, masked_conv2d
+from .modulated_deform_conv import (ModulatedDeformConv2d,
+                                    ModulatedDeformConv2dPack,
+                                    modulated_deform_conv2d)
+from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms
+from .point_sample import (SimpleRoIAlign, point_sample,
+                           rel_roi_point_to_rel_img_point)
+from .psa_mask import PSAMask
+from .roi_align import RoIAlign, roi_align
+from .roi_pool import RoIPool, roi_pool
+from .saconv import SAConv2d
+from .sync_bn import SyncBatchNorm
+from .tin_shift import TINShift, tin_shift
+
+__all__ = [
+    'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe',
+    'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack',
+    'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack',
+    'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss',
+    'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss',
+    'get_compiler_version', 'get_compiling_cuda_version',
+    'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d',
+    'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack',
+    'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match',
+    'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d',
+    'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask',
+    'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign',
+    'SAConv2d', 'TINShift', 'tin_shift', 'box_iou_rotated', 'nms_rotated'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/bbox.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/bbox.py
new file mode 100644
index 0000000000000000000000000000000000000000..06bd10e24d945c9b6857c675e2a38c122864f391
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/bbox.py
@@ -0,0 +1,71 @@
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
+
+
+def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
+    """Calculate overlap between two set of bboxes.
+
+    If ``aligned`` is ``False``, then calculate the ious between each bbox
+    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
+    bboxes1 and bboxes2.
+
+    Args:
+        bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format or empty.
+        bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format or empty.
+            If aligned is ``True``, then m and n must be equal.
+        mode (str): "iou" (intersection over union) or iof (intersection over
+            foreground).
+
+    Returns:
+        ious(Tensor): shape (m, n) if aligned == False else shape (m, 1)
+
+    Example:
+        >>> bboxes1 = torch.FloatTensor([
+        >>>     [0, 0, 10, 10],
+        >>>     [10, 10, 20, 20],
+        >>>     [32, 32, 38, 42],
+        >>> ])
+        >>> bboxes2 = torch.FloatTensor([
+        >>>     [0, 0, 10, 20],
+        >>>     [0, 10, 10, 19],
+        >>>     [10, 10, 20, 20],
+        >>> ])
+        >>> bbox_overlaps(bboxes1, bboxes2)
+        tensor([[0.5000, 0.0000, 0.0000],
+                [0.0000, 0.0000, 1.0000],
+                [0.0000, 0.0000, 0.0000]])
+
+    Example:
+        >>> empty = torch.FloatTensor([])
+        >>> nonempty = torch.FloatTensor([
+        >>>     [0, 0, 10, 9],
+        >>> ])
+        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
+        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
+        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
+    """
+
+    mode_dict = {'iou': 0, 'iof': 1}
+    assert mode in mode_dict.keys()
+    mode_flag = mode_dict[mode]
+    # Either the boxes are empty or the length of boxes's last dimenstion is 4
+    assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0)
+    assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0)
+    assert offset == 1 or offset == 0
+
+    rows = bboxes1.size(0)
+    cols = bboxes2.size(0)
+    if aligned:
+        assert rows == cols
+
+    if rows * cols == 0:
+        return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols)
+
+    if aligned:
+        ious = bboxes1.new_zeros(rows)
+    else:
+        ious = bboxes1.new_zeros((rows, cols))
+    ext_module.bbox_overlaps(
+        bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)
+    return ious
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/box_iou_rotated.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/box_iou_rotated.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d229e565c6b7a49741fc7e8f785265557b09f27
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/box_iou_rotated.py
@@ -0,0 +1,42 @@
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated'])
+
+
+def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False):
+    """Return intersection-over-union (Jaccard index) of boxes.
+
+    Both sets of boxes are expected to be in
+    (x_center, y_center, width, height, angle) format.
+
+    If ``aligned`` is ``False``, then calculate the ious between each bbox
+    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
+    bboxes1 and bboxes2.
+
+    Arguments:
+        boxes1 (Tensor): rotated bboxes 1. \
+            It has shape (N, 5), indicating (x, y, w, h, theta) for each row.
+        boxes2 (Tensor): rotated bboxes 2. \
+            It has shape (M, 5), indicating (x, y, w, h, theta) for each row.
+        mode (str): "iou" (intersection over union) or iof (intersection over
+            foreground).
+
+    Returns:
+        ious(Tensor): shape (N, M) if aligned == False else shape (N,)
+    """
+    assert mode in ['iou', 'iof']
+    mode_dict = {'iou': 0, 'iof': 1}
+    mode_flag = mode_dict[mode]
+    rows = bboxes1.size(0)
+    cols = bboxes2.size(0)
+    if aligned:
+        ious = bboxes1.new_zeros(rows)
+    else:
+        ious = bboxes1.new_zeros((rows * cols))
+    bboxes1 = bboxes1.contiguous()
+    bboxes2 = bboxes2.contiguous()
+    ext_module.box_iou_rotated(
+        bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned)
+    if not aligned:
+        ious = ious.view(rows, cols)
+    return ious
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/carafe.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/carafe.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ec679189185f1a5fc5d507c6547ac53577cfb64
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/carafe.py
@@ -0,0 +1,286 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.nn.modules.module import Module
+
+from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+    'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward',
+    'carafe_backward'
+])
+
+
+class CARAFENaiveFunction(Function):
+
+    @staticmethod
+    def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
+        return g.op(
+            'MMCVCARAFENaive',
+            features,
+            masks,
+            kernel_size=kernel_size,
+            group_size=group_size,
+            scale_factor=scale_factor)
+
+    @staticmethod
+    def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
+        assert scale_factor >= 1
+        assert masks.size(1) == kernel_size * kernel_size * group_size
+        assert masks.size(-1) == features.size(-1) * scale_factor
+        assert masks.size(-2) == features.size(-2) * scale_factor
+        assert features.size(1) % group_size == 0
+        assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
+        ctx.kernel_size = kernel_size
+        ctx.group_size = group_size
+        ctx.scale_factor = scale_factor
+        ctx.feature_size = features.size()
+        ctx.mask_size = masks.size()
+
+        n, c, h, w = features.size()
+        output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
+        ext_module.carafe_naive_forward(
+            features,
+            masks,
+            output,
+            kernel_size=kernel_size,
+            group_size=group_size,
+            scale_factor=scale_factor)
+
+        if features.requires_grad or masks.requires_grad:
+            ctx.save_for_backward(features, masks)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        assert grad_output.is_cuda
+
+        features, masks = ctx.saved_tensors
+        kernel_size = ctx.kernel_size
+        group_size = ctx.group_size
+        scale_factor = ctx.scale_factor
+
+        grad_input = torch.zeros_like(features)
+        grad_masks = torch.zeros_like(masks)
+        ext_module.carafe_naive_backward(
+            grad_output.contiguous(),
+            features,
+            masks,
+            grad_input,
+            grad_masks,
+            kernel_size=kernel_size,
+            group_size=group_size,
+            scale_factor=scale_factor)
+
+        return grad_input, grad_masks, None, None, None
+
+
+carafe_naive = CARAFENaiveFunction.apply
+
+
+class CARAFENaive(Module):
+
+    def __init__(self, kernel_size, group_size, scale_factor):
+        super(CARAFENaive, self).__init__()
+
+        assert isinstance(kernel_size, int) and isinstance(
+            group_size, int) and isinstance(scale_factor, int)
+        self.kernel_size = kernel_size
+        self.group_size = group_size
+        self.scale_factor = scale_factor
+
+    def forward(self, features, masks):
+        return carafe_naive(features, masks, self.kernel_size, self.group_size,
+                            self.scale_factor)
+
+
+class CARAFEFunction(Function):
+
+    @staticmethod
+    def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
+        return g.op(
+            'MMCVCARAFE',
+            features,
+            masks,
+            kernel_size=kernel_size,
+            group_size=group_size,
+            scale_factor=scale_factor)
+
+    @staticmethod
+    def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
+        assert scale_factor >= 1
+        assert masks.size(1) == kernel_size * kernel_size * group_size
+        assert masks.size(-1) == features.size(-1) * scale_factor
+        assert masks.size(-2) == features.size(-2) * scale_factor
+        assert features.size(1) % group_size == 0
+        assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
+        ctx.kernel_size = kernel_size
+        ctx.group_size = group_size
+        ctx.scale_factor = scale_factor
+        ctx.feature_size = features.size()
+        ctx.mask_size = masks.size()
+
+        n, c, h, w = features.size()
+        output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
+        routput = features.new_zeros(output.size(), requires_grad=False)
+        rfeatures = features.new_zeros(features.size(), requires_grad=False)
+        rmasks = masks.new_zeros(masks.size(), requires_grad=False)
+        ext_module.carafe_forward(
+            features,
+            masks,
+            rfeatures,
+            routput,
+            rmasks,
+            output,
+            kernel_size=kernel_size,
+            group_size=group_size,
+            scale_factor=scale_factor)
+
+        if features.requires_grad or masks.requires_grad:
+            ctx.save_for_backward(features, masks, rfeatures)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        assert grad_output.is_cuda
+
+        features, masks, rfeatures = ctx.saved_tensors
+        kernel_size = ctx.kernel_size
+        group_size = ctx.group_size
+        scale_factor = ctx.scale_factor
+
+        rgrad_output = torch.zeros_like(grad_output, requires_grad=False)
+        rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False)
+        rgrad_input = torch.zeros_like(features, requires_grad=False)
+        rgrad_masks = torch.zeros_like(masks, requires_grad=False)
+        grad_input = torch.zeros_like(features, requires_grad=False)
+        grad_masks = torch.zeros_like(masks, requires_grad=False)
+        ext_module.carafe_backward(
+            grad_output.contiguous(),
+            rfeatures,
+            masks,
+            rgrad_output,
+            rgrad_input_hs,
+            rgrad_input,
+            rgrad_masks,
+            grad_input,
+            grad_masks,
+            kernel_size=kernel_size,
+            group_size=group_size,
+            scale_factor=scale_factor)
+        return grad_input, grad_masks, None, None, None
+
+
+carafe = CARAFEFunction.apply
+
+
+class CARAFE(Module):
+    """ CARAFE: Content-Aware ReAssembly of FEatures
+
+    Please refer to https://arxiv.org/abs/1905.02188 for more details.
+
+    Args:
+        kernel_size (int): reassemble kernel size
+        group_size (int): reassemble group size
+        scale_factor (int): upsample ratio
+
+    Returns:
+        upsampled feature map
+    """
+
+    def __init__(self, kernel_size, group_size, scale_factor):
+        super(CARAFE, self).__init__()
+
+        assert isinstance(kernel_size, int) and isinstance(
+            group_size, int) and isinstance(scale_factor, int)
+        self.kernel_size = kernel_size
+        self.group_size = group_size
+        self.scale_factor = scale_factor
+
+    def forward(self, features, masks):
+        return carafe(features, masks, self.kernel_size, self.group_size,
+                      self.scale_factor)
+
+
+@UPSAMPLE_LAYERS.register_module(name='carafe')
+class CARAFEPack(nn.Module):
+    """A unified package of CARAFE upsampler that contains: 1) channel
+    compressor 2) content encoder 3) CARAFE op.
+
+    Official implementation of ICCV 2019 paper
+    CARAFE: Content-Aware ReAssembly of FEatures
+    Please refer to https://arxiv.org/abs/1905.02188 for more details.
+
+    Args:
+        channels (int): input feature channels
+        scale_factor (int): upsample ratio
+        up_kernel (int): kernel size of CARAFE op
+        up_group (int): group size of CARAFE op
+        encoder_kernel (int): kernel size of content encoder
+        encoder_dilation (int): dilation of content encoder
+        compressed_channels (int): output channels of channels compressor
+
+    Returns:
+        upsampled feature map
+    """
+
+    def __init__(self,
+                 channels,
+                 scale_factor,
+                 up_kernel=5,
+                 up_group=1,
+                 encoder_kernel=3,
+                 encoder_dilation=1,
+                 compressed_channels=64):
+        super(CARAFEPack, self).__init__()
+        self.channels = channels
+        self.scale_factor = scale_factor
+        self.up_kernel = up_kernel
+        self.up_group = up_group
+        self.encoder_kernel = encoder_kernel
+        self.encoder_dilation = encoder_dilation
+        self.compressed_channels = compressed_channels
+        self.channel_compressor = nn.Conv2d(channels, self.compressed_channels,
+                                            1)
+        self.content_encoder = nn.Conv2d(
+            self.compressed_channels,
+            self.up_kernel * self.up_kernel * self.up_group *
+            self.scale_factor * self.scale_factor,
+            self.encoder_kernel,
+            padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2),
+            dilation=self.encoder_dilation,
+            groups=1)
+        self.init_weights()
+
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                xavier_init(m, distribution='uniform')
+        normal_init(self.content_encoder, std=0.001)
+
+    def kernel_normalizer(self, mask):
+        mask = F.pixel_shuffle(mask, self.scale_factor)
+        n, mask_c, h, w = mask.size()
+        # use float division explicitly,
+        # to void inconsistency while exporting to onnx
+        mask_channel = int(mask_c / float(self.up_kernel**2))
+        mask = mask.view(n, mask_channel, -1, h, w)
+
+        mask = F.softmax(mask, dim=2)
+        mask = mask.view(n, mask_c, h, w).contiguous()
+
+        return mask
+
+    def feature_reassemble(self, x, mask):
+        x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor)
+        return x
+
+    def forward(self, x):
+        compressed_x = self.channel_compressor(x)
+        mask = self.content_encoder(compressed_x)
+        mask = self.kernel_normalizer(mask)
+
+        x = self.feature_reassemble(x, mask)
+        return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/cc_attention.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/cc_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f59d29fd08ccddcd9148f7403986a673afedd19
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/cc_attention.py
@@ -0,0 +1,95 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd.function import once_differentiable
+
+from mmcv.cnn import Scale
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+    '_ext', ['ca_forward', 'ca_backward', 'ca_map_forward', 'ca_map_backward'])
+
+
+class CAWeightFunction(torch.autograd.Function):
+
+    @staticmethod
+    def symbolic(g, t, f):
+        return g.op('MMCVCAWeight', t, f)
+
+    @staticmethod
+    def forward(ctx, t, f):
+        n, c, h, w = t.size()
+        weight = torch.zeros(n, h + w - 1, h, w).to(t.device)
+        ext_module.ca_forward(t, f, weight)
+
+        ctx.save_for_backward(t, f)
+
+        return weight
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dw):
+        t, f = ctx.saved_tensors
+        dt = torch.zeros_like(t)
+        df = torch.zeros_like(f)
+        ext_module.ca_backward(dw, t, f, dt, df)
+        return dt, df
+
+
+class CAMapFunction(torch.autograd.Function):
+
+    @staticmethod
+    def symbolic(g, weight, v):
+        return g.op('MMCVCAMap', weight, v)
+
+    @staticmethod
+    def forward(ctx, weight, v):
+        out = torch.zeros_like(v)
+        ext_module.ca_map_forward(weight, v, out)
+
+        ctx.save_for_backward(weight, v)
+
+        return out
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dout):
+        weight, v = ctx.saved_tensors
+        dw = torch.zeros_like(weight)
+        dv = torch.zeros_like(v)
+        ext_module.ca_map_backward(dout, weight, v, dw, dv)
+
+        return dw, dv
+
+
+ca_weight = CAWeightFunction.apply
+ca_map = CAMapFunction.apply
+
+
+class CrissCrossAttention(nn.Module):
+    """Criss-Cross Attention Module."""
+
+    def __init__(self, in_channels):
+        super(CrissCrossAttention, self).__init__()
+        self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
+        self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
+        self.value_conv = nn.Conv2d(in_channels, in_channels, 1)
+        self.gamma = Scale(0.)
+        self.in_channels = in_channels
+
+    def forward(self, x):
+        proj_query = self.query_conv(x)
+        proj_key = self.key_conv(x)
+        proj_value = self.value_conv(x)
+
+        energy = ca_weight(proj_query, proj_key)
+        attention = F.softmax(energy, 1)
+        out = ca_map(attention, proj_value)
+        out = self.gamma(out) + x
+
+        return out
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(in_channels={self.in_channels})'
+        return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/corner_pool.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/corner_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b0d871933717daa12bcb233350e805e080733ef
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/corner_pool.py
@@ -0,0 +1,125 @@
+import torch
+from torch import nn
+from torch.autograd import Function
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+    'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward',
+    'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward',
+    'right_pool_forward', 'right_pool_backward'
+])
+
+
+class TopPoolFunction(Function):
+
+    @staticmethod
+    def forward(ctx, input):
+        output = ext_module.top_pool_forward(input)
+        ctx.save_for_backward(input)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        output = ext_module.top_pool_backward(input, grad_output)
+        return output
+
+
+class BottomPoolFunction(Function):
+
+    @staticmethod
+    def forward(ctx, input):
+        output = ext_module.bottom_pool_forward(input)
+        ctx.save_for_backward(input)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        output = ext_module.bottom_pool_backward(input, grad_output)
+        return output
+
+
+class LeftPoolFunction(Function):
+
+    @staticmethod
+    def forward(ctx, input):
+        output = ext_module.left_pool_forward(input)
+        ctx.save_for_backward(input)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        output = ext_module.left_pool_backward(input, grad_output)
+        return output
+
+
+class RightPoolFunction(Function):
+
+    @staticmethod
+    def forward(ctx, input):
+        output = ext_module.right_pool_forward(input)
+        ctx.save_for_backward(input)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        output = ext_module.right_pool_backward(input, grad_output)
+        return output
+
+
+class CornerPool(nn.Module):
+    """Corner Pooling.
+
+    Corner Pooling is a new type of pooling layer that helps a
+    convolutional network better localize corners of bounding boxes.
+
+    Please refer to https://arxiv.org/abs/1808.01244 for more details.
+    Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
+
+    Args:
+        mode(str): Pooling orientation for the pooling layer
+
+            - 'bottom': Bottom Pooling
+            - 'left': Left Pooling
+            - 'right': Right Pooling
+            - 'top': Top Pooling
+
+    Returns:
+        Feature map after pooling.
+    """
+
+    pool_functions = {
+        'bottom': BottomPoolFunction,
+        'left': LeftPoolFunction,
+        'right': RightPoolFunction,
+        'top': TopPoolFunction,
+    }
+
+    cummax_dim_flip = {
+        'bottom': (2, False),
+        'left': (3, True),
+        'right': (3, False),
+        'top': (2, True),
+    }
+
+    def __init__(self, mode):
+        super(CornerPool, self).__init__()
+        assert mode in self.pool_functions
+        self.mode = mode
+        self.corner_pool = self.pool_functions[mode]
+
+    def forward(self, x):
+        if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
+            dim, flip = self.cummax_dim_flip[self.mode]
+            if flip:
+                x = x.flip(dim)
+            pool_tensor, _ = torch.cummax(x, dim=dim)
+            if flip:
+                pool_tensor = pool_tensor.flip(dim)
+            return pool_tensor
+        else:
+            return self.corner_pool.apply(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..e5fccabae47fb45b5800c45dd3755b03d7e505fa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh
@@ -0,0 +1,83 @@
+#ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH
+#define BBOX_OVERLAPS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
+                                          T* ious, const int num_bbox1,
+                                          const int num_bbox2, const int mode,
+                                          const bool aligned,
+                                          const int offset) {
+  if (aligned) {
+    CUDA_1D_KERNEL_LOOP(index, num_bbox1) {
+      int b1 = index;
+      int b2 = index;
+
+      int base1 = b1 * 4;
+      T b1_x1 = bbox1[base1];
+      T b1_y1 = bbox1[base1 + 1];
+      T b1_x2 = bbox1[base1 + 2];
+      T b1_y2 = bbox1[base1 + 3];
+      T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);
+
+      int base2 = b2 * 4;
+      T b2_x1 = bbox2[base2];
+      T b2_y1 = bbox2[base2 + 1];
+      T b2_x2 = bbox2[base2 + 2];
+      T b2_y2 = bbox2[base2 + 3];
+      T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);
+
+      T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
+      T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
+      T width = fmaxf(right - left + offset, 0.f);
+      T height = fmaxf(bottom - top + offset, 0.f);
+      T interS = width * height;
+      T baseS = 1.0;
+      if (mode == 0) {
+        baseS = fmaxf(b1_area + b2_area - interS, T(offset));
+      } else if (mode == 1) {
+        baseS = fmaxf(b1_area, T(offset));
+      }
+      ious[index] = interS / baseS;
+    }
+  } else {
+    CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) {
+      int b1 = index / num_bbox2;
+      int b2 = index % num_bbox2;
+
+      int base1 = b1 * 4;
+      T b1_x1 = bbox1[base1];
+      T b1_y1 = bbox1[base1 + 1];
+      T b1_x2 = bbox1[base1 + 2];
+      T b1_y2 = bbox1[base1 + 3];
+      T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);
+
+      int base2 = b2 * 4;
+      T b2_x1 = bbox2[base2];
+      T b2_y1 = bbox2[base2 + 1];
+      T b2_x2 = bbox2[base2 + 2];
+      T b2_y2 = bbox2[base2 + 3];
+      T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);
+
+      T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
+      T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
+      T width = fmaxf(right - left + offset, 0.f);
+      T height = fmaxf(bottom - top + offset, 0.f);
+      T interS = width * height;
+      T baseS = 1.0;
+      if (mode == 0) {
+        baseS = fmaxf(b1_area + b2_area - interS, T(offset));
+      } else if (mode == 1) {
+        baseS = fmaxf(b1_area, T(offset));
+      }
+      ious[index] = interS / baseS;
+    }
+  }
+}
+
+#endif  // BBOX_OVERLAPS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/box_iou_rotated_cuda.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/box_iou_rotated_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..abd47cd85437804310886de057b5a839a49481b2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/box_iou_rotated_cuda.cuh
@@ -0,0 +1,81 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+#ifndef BOX_IOU_ROTATED_CUDA_CUH
+#define BOX_IOU_ROTATED_CUDA_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+#include "box_iou_rotated_utils.hpp"
+
+// 2D block with 32 * 16 = 512 threads per block
+const int BLOCK_DIM_X = 32;
+const int BLOCK_DIM_Y = 16;
+
+inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); }
+
+template <typename T>
+__global__ void box_iou_rotated_cuda_kernel(
+    const int n_boxes1, const int n_boxes2, const T* dev_boxes1,
+    const T* dev_boxes2, T* dev_ious, const int mode_flag, const bool aligned) {
+  if (aligned) {
+    CUDA_1D_KERNEL_LOOP(index, n_boxes1) {
+      int b1 = index;
+      int b2 = index;
+
+      int base1 = b1 * 5;
+
+      float block_boxes1[5];
+      float block_boxes2[5];
+
+      block_boxes1[0] = dev_boxes1[base1 + 0];
+      block_boxes1[1] = dev_boxes1[base1 + 1];
+      block_boxes1[2] = dev_boxes1[base1 + 2];
+      block_boxes1[3] = dev_boxes1[base1 + 3];
+      block_boxes1[4] = dev_boxes1[base1 + 4];
+
+      int base2 = b2 * 5;
+
+      block_boxes2[0] = dev_boxes2[base2 + 0];
+      block_boxes2[1] = dev_boxes2[base2 + 1];
+      block_boxes2[2] = dev_boxes2[base2 + 2];
+      block_boxes2[3] = dev_boxes2[base2 + 3];
+      block_boxes2[4] = dev_boxes2[base2 + 4];
+
+      dev_ious[index] =
+          single_box_iou_rotated<T>(block_boxes1, block_boxes2, mode_flag);
+    }
+  } else {
+    CUDA_1D_KERNEL_LOOP(index, n_boxes1 * n_boxes2) {
+      int b1 = index / n_boxes2;
+      int b2 = index % n_boxes2;
+
+      int base1 = b1 * 5;
+
+      float block_boxes1[5];
+      float block_boxes2[5];
+
+      block_boxes1[0] = dev_boxes1[base1 + 0];
+      block_boxes1[1] = dev_boxes1[base1 + 1];
+      block_boxes1[2] = dev_boxes1[base1 + 2];
+      block_boxes1[3] = dev_boxes1[base1 + 3];
+      block_boxes1[4] = dev_boxes1[base1 + 4];
+
+      int base2 = b2 * 5;
+
+      block_boxes2[0] = dev_boxes2[base2 + 0];
+      block_boxes2[1] = dev_boxes2[base2 + 1];
+      block_boxes2[2] = dev_boxes2[base2 + 2];
+      block_boxes2[3] = dev_boxes2[base2 + 3];
+      block_boxes2[4] = dev_boxes2[base2 + 4];
+
+      dev_ious[index] =
+          single_box_iou_rotated<T>(block_boxes1, block_boxes2, mode_flag);
+    }
+  }
+}
+
+#endif
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/box_iou_rotated_utils.hpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/box_iou_rotated_utils.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..67190dc10eb245bb2bea23133ac984cd1c5a4888
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/box_iou_rotated_utils.hpp
@@ -0,0 +1,343 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
+#pragma once
+#include <cassert>
+#include <cmath>
+
+#ifdef __CUDACC__
+// Designates functions callable from the host (CPU) and the device (GPU)
+#define HOST_DEVICE __host__ __device__
+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
+#else
+#include <algorithm>
+#define HOST_DEVICE
+#define HOST_DEVICE_INLINE HOST_DEVICE inline
+#endif
+
+namespace {
+
+template <typename T>
+struct RotatedBox {
+  T x_ctr, y_ctr, w, h, a;
+};
+
+template <typename T>
+struct Point {
+  T x, y;
+  HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
+  HOST_DEVICE_INLINE Point operator+(const Point& p) const {
+    return Point(x + p.x, y + p.y);
+  }
+  HOST_DEVICE_INLINE Point& operator+=(const Point& p) {
+    x += p.x;
+    y += p.y;
+    return *this;
+  }
+  HOST_DEVICE_INLINE Point operator-(const Point& p) const {
+    return Point(x - p.x, y - p.y);
+  }
+  HOST_DEVICE_INLINE Point operator*(const T coeff) const {
+    return Point(x * coeff, y * coeff);
+  }
+};
+
+template <typename T>
+HOST_DEVICE_INLINE T dot_2d(const Point<T>& A, const Point<T>& B) {
+  return A.x * B.x + A.y * B.y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T cross_2d(const Point<T>& A, const Point<T>& B) {
+  return A.x * B.y - B.x * A.y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox<T>& box,
+                                             Point<T> (&pts)[4]) {
+  // M_PI / 180. == 0.01745329251
+  // double theta = box.a * 0.01745329251;
+  // MODIFIED
+  double theta = box.a;
+  T cosTheta2 = (T)cos(theta) * 0.5f;
+  T sinTheta2 = (T)sin(theta) * 0.5f;
+
+  // y: top --> down; x: left --> right
+  pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
+  pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
+  pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
+  pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
+  pts[2].x = 2 * box.x_ctr - pts[0].x;
+  pts[2].y = 2 * box.y_ctr - pts[0].y;
+  pts[3].x = 2 * box.x_ctr - pts[1].x;
+  pts[3].y = 2 * box.y_ctr - pts[1].y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE int get_intersection_points(const Point<T> (&pts1)[4],
+                                               const Point<T> (&pts2)[4],
+                                               Point<T> (&intersections)[24]) {
+  // Line vector
+  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+  Point<T> vec1[4], vec2[4];
+  for (int i = 0; i < 4; i++) {
+    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
+    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
+  }
+
+  // Line test - test all line combos for intersection
+  int num = 0;  // number of intersections
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      // Solve for 2x2 Ax=b
+      T det = cross_2d<T>(vec2[j], vec1[i]);
+
+      // This takes care of parallel lines
+      if (fabs(det) <= 1e-14) {
+        continue;
+      }
+
+      auto vec12 = pts2[j] - pts1[i];
+
+      T t1 = cross_2d<T>(vec2[j], vec12) / det;
+      T t2 = cross_2d<T>(vec1[i], vec12) / det;
+
+      if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
+        intersections[num++] = pts1[i] + vec1[i] * t1;
+      }
+    }
+  }
+
+  // Check for vertices of rect1 inside rect2
+  {
+    const auto& AB = vec2[0];
+    const auto& DA = vec2[3];
+    auto ABdotAB = dot_2d<T>(AB, AB);
+    auto ADdotAD = dot_2d<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      // assume ABCD is the rectangle, and P is the point to be judged
+      // P is inside ABCD iff. P's projection on AB lies within AB
+      // and P's projection on AD lies within AD
+
+      auto AP = pts1[i] - pts2[0];
+
+      auto APdotAB = dot_2d<T>(AP, AB);
+      auto APdotAD = -dot_2d<T>(AP, DA);
+
+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+          (APdotAD <= ADdotAD)) {
+        intersections[num++] = pts1[i];
+      }
+    }
+  }
+
+  // Reverse the check - check for vertices of rect2 inside rect1
+  {
+    const auto& AB = vec1[0];
+    const auto& DA = vec1[3];
+    auto ABdotAB = dot_2d<T>(AB, AB);
+    auto ADdotAD = dot_2d<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      auto AP = pts2[i] - pts1[0];
+
+      auto APdotAB = dot_2d<T>(AP, AB);
+      auto APdotAD = -dot_2d<T>(AP, DA);
+
+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+          (APdotAD <= ADdotAD)) {
+        intersections[num++] = pts2[i];
+      }
+    }
+  }
+
+  return num;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
+                                          const int& num_in, Point<T> (&q)[24],
+                                          bool shift_to_zero = false) {
+  assert(num_in >= 2);
+
+  // Step 1:
+  // Find point with minimum y
+  // if more than 1 points have the same minimum y,
+  // pick the one with the minimum x.
+  int t = 0;
+  for (int i = 1; i < num_in; i++) {
+    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+      t = i;
+    }
+  }
+  auto& start = p[t];  // starting point
+
+  // Step 2:
+  // Subtract starting point from every points (for sorting in the next step)
+  for (int i = 0; i < num_in; i++) {
+    q[i] = p[i] - start;
+  }
+
+  // Swap the starting point to position 0
+  auto tmp = q[0];
+  q[0] = q[t];
+  q[t] = tmp;
+
+  // Step 3:
+  // Sort point 1 ~ num_in according to their relative cross-product values
+  // (essentially sorting according to angles)
+  // If the angles are the same, sort according to their distance to origin
+  T dist[24];
+  for (int i = 0; i < num_in; i++) {
+    dist[i] = dot_2d<T>(q[i], q[i]);
+  }
+
+#ifdef __CUDACC__
+  // CUDA version
+  // In the future, we can potentially use thrust
+  // for sorting here to improve speed (though not guaranteed)
+  for (int i = 1; i < num_in - 1; i++) {
+    for (int j = i + 1; j < num_in; j++) {
+      T crossProduct = cross_2d<T>(q[i], q[j]);
+      if ((crossProduct < -1e-6) ||
+          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
+        auto q_tmp = q[i];
+        q[i] = q[j];
+        q[j] = q_tmp;
+        auto dist_tmp = dist[i];
+        dist[i] = dist[j];
+        dist[j] = dist_tmp;
+      }
+    }
+  }
+#else
+  // CPU version
+  std::sort(q + 1, q + num_in,
+            [](const Point<T>& A, const Point<T>& B) -> bool {
+              T temp = cross_2d<T>(A, B);
+              if (fabs(temp) < 1e-6) {
+                return dot_2d<T>(A, A) < dot_2d<T>(B, B);
+              } else {
+                return temp > 0;
+              }
+            });
+#endif
+
+  // Step 4:
+  // Make sure there are at least 2 points (that don't overlap with each other)
+  // in the stack
+  int k;  // index of the non-overlapped second point
+  for (k = 1; k < num_in; k++) {
+    if (dist[k] > 1e-8) {
+      break;
+    }
+  }
+  if (k == num_in) {
+    // We reach the end, which means the convex hull is just one point
+    q[0] = p[t];
+    return 1;
+  }
+  q[1] = q[k];
+  int m = 2;  // 2 points in the stack
+  // Step 5:
+  // Finally we can start the scanning process.
+  // When a non-convex relationship between the 3 points is found
+  // (either concave shape or duplicated points),
+  // we pop the previous point from the stack
+  // until the 3-point relationship is convex again, or
+  // until the stack only contains two points
+  for (int i = k + 1; i < num_in; i++) {
+    while (m > 1 && cross_2d<T>(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
+      m--;
+    }
+    q[m++] = q[i];
+  }
+
+  // Step 6 (Optional):
+  // In general sense we need the original coordinates, so we
+  // need to shift the points back (reverting Step 2)
+  // But if we're only interested in getting the area/perimeter of the shape
+  // We can simply return.
+  if (!shift_to_zero) {
+    for (int i = 0; i < m; i++) {
+      q[i] += start;
+    }
+  }
+
+  return m;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int& m) {
+  if (m <= 2) {
+    return 0;
+  }
+
+  T area = 0;
+  for (int i = 1; i < m - 1; i++) {
+    area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0]));
+  }
+
+  return area / 2.0;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T rotated_boxes_intersection(const RotatedBox<T>& box1,
+                                                const RotatedBox<T>& box2) {
+  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+  // from rotated_rect_intersection_pts
+  Point<T> intersectPts[24], orderedPts[24];
+
+  Point<T> pts1[4];
+  Point<T> pts2[4];
+  get_rotated_vertices<T>(box1, pts1);
+  get_rotated_vertices<T>(box2, pts2);
+
+  int num = get_intersection_points<T>(pts1, pts2, intersectPts);
+
+  if (num <= 2) {
+    return 0.0;
+  }
+
+  // Convex Hull to order the intersection points in clockwise order and find
+  // the contour area.
+  int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true);
+  return polygon_area<T>(orderedPts, num_convex);
+}
+
+}  // namespace
+
+template <typename T>
+HOST_DEVICE_INLINE T single_box_iou_rotated(T const* const box1_raw,
+                                            T const* const box2_raw,
+                                            const int mode_flag) {
+  // shift center to the middle point to achieve higher precision in result
+  RotatedBox<T> box1, box2;
+  auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
+  auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
+  box1.x_ctr = box1_raw[0] - center_shift_x;
+  box1.y_ctr = box1_raw[1] - center_shift_y;
+  box1.w = box1_raw[2];
+  box1.h = box1_raw[3];
+  box1.a = box1_raw[4];
+  box2.x_ctr = box2_raw[0] - center_shift_x;
+  box2.y_ctr = box2_raw[1] - center_shift_y;
+  box2.w = box2_raw[2];
+  box2.h = box2_raw[3];
+  box2.a = box2_raw[4];
+
+  const T area1 = box1.w * box1.h;
+  const T area2 = box2.w * box2.h;
+  if (area1 < 1e-14 || area2 < 1e-14) {
+    return 0.f;
+  }
+
+  const T intersection = rotated_boxes_intersection<T>(box1, box2);
+  T baseS = 1.0;
+  if (mode_flag == 0) {
+    baseS = (area1 + area2 - intersection);
+  } else if (mode_flag == 1) {
+    baseS = area1;
+  }
+  const T iou = intersection / baseS;
+  return iou;
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/carafe_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/carafe_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..e9b569d3b5e67ec470812a0a786d2f141f63f113
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/carafe_cuda_kernel.cuh
@@ -0,0 +1,314 @@
+#ifndef CARAFE_CUDA_KERNEL_CUH
+#define CARAFE_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#define WARP_SIZE 32
+#define THREADS_PER_PIXEL 32
+#define MAX_SHARED_MEMORY 49152
+#define MAX_SHARED_SCALAR_T 6144  // 49152 / 8 = 6144
+#define MAXIMIZE_KERNEL_SIZE true
+#define kTileDim 32
+#define kBlockRows 8
+#define FULL_MASK 0xffffffff
+
+inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); }
+
+__device__ inline int Loc2Index(const int n, const int c, const int h,
+                                const int w, const int channel_num,
+                                const int height, const int width) {
+  int index = w + (h + (c + n * channel_num) * height) * width;
+  return index;
+}
+/* TODO: move this to a common place */
+template <typename scalar_t>
+__device__ inline scalar_t min(scalar_t a, scalar_t b) {
+  return a < b ? a : b;
+}
+
+template <typename scalar_t>
+__device__ inline scalar_t max(scalar_t a, scalar_t b) {
+  return a > b ? a : b;
+}
+
+template <typename scalar_t>
+__device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) {
+  for (int offset = 16; offset > 0; offset /= 2)
+    val += __shfl_down_sync(FULL_MASK, val, offset);
+  return val;
+}
+
+template <>
+__device__ __forceinline__ phalf warpReduceSum(phalf val) {
+  for (int offset = 16; offset > 0; offset /= 2)
+    __PHALF(val) +=
+        __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset);
+  return val;
+}
+
+// Splits the original matrix into submatrices with size 32 * 32.
+// Each block transposes one submatrix by loading it into shared memory.
+// Reference https://devblogs.nvidia.com/efficient-matrix-transpose-cuda-cc/
+template <typename scalar_t>
+__global__ void BatchTranspose2DCUDAKernel(const int N, const int H,
+                                           const int W, const int dh,
+                                           const int dw,
+                                           const scalar_t *__restrict__ X,
+                                           scalar_t *__restrict__ Y) {
+  __shared__ scalar_t tile[kTileDim][kTileDim + 1];
+  const int n = blockIdx.x / (dh * dw);
+  const int k = blockIdx.x % (dh * dw);
+  const int r = k / dw;
+  const int c = k % dw;
+  const int offset = n * H * W;
+  int x = c * kTileDim + threadIdx.x;
+  int y = r * kTileDim + threadIdx.y;
+  if (x < W) {
+    for (int i = 0; threadIdx.y + i < kTileDim && y + i < H; i += kBlockRows) {
+      tile[threadIdx.y + i][threadIdx.x] = X[offset + (y + i) * W + x];
+    }
+  }
+  __syncthreads();
+  x = r * kTileDim + threadIdx.x;
+  y = c * kTileDim + threadIdx.y;
+  if (x < H) {
+    for (int i = 0; threadIdx.y + i < kTileDim && y + i < W; i += kBlockRows) {
+      Y[offset + (y + i) * H + x] = tile[threadIdx.x][threadIdx.y + i];
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void CARAFEForward(
+    const int num_kernels, const scalar_t *__restrict__ bottom_data,
+    const scalar_t *__restrict__ bottom_masks, const int kernel_size,
+    const int group_size, const int scale_factor, const int channels,
+    const int down_height, const int down_width, const int height,
+    const int width, const int mask_channels, scalar_t *__restrict__ top_data) {
+#if MAXIMIZE_KERNEL_SIZE
+  __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2];
+#else
+  __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T];
+#endif
+
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index > num_kernels - 1) {
+    return;
+  }
+  const int pixel_id = threadIdx.x / THREADS_PER_PIXEL;
+  const int split_id = threadIdx.x % THREADS_PER_PIXEL;
+  index = index / THREADS_PER_PIXEL;
+  const int pw = index % width;
+  const int ph = (index / width) % height;
+  const int n = index / width / height;
+
+  const int down_pw = pw / scale_factor;
+  const int down_ph = ph / scale_factor;
+
+  const int start_w = down_pw - (kernel_size - 1) / 2;
+  const int end_w = down_pw + (kernel_size - 1) / 2 + 1;
+  const int start_h = down_ph - (kernel_size - 1) / 2;
+  const int end_h = down_ph + (kernel_size - 1) / 2 + 1;
+  for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) {
+    int mask_index = Loc2Index(n, ph, pw, c, height, width, mask_channels);
+    shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index];
+  }
+  __syncthreads();
+
+  const int channels_per_group = ceilf(channels / (float)group_size);
+#pragma unroll
+  for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
+    int mask_group = c / channels_per_group;
+    scalar_t output_val = 0;
+#pragma unroll
+    for (int iy = start_h; iy < end_h; iy++) {
+#pragma unroll
+      for (int ix = start_w; ix < end_w; ix++) {
+        if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
+          continue;
+        }
+        int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
+        int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
+        int mask_c =
+            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+        int feat_index =
+            Loc2Index(n, iy, ix, c, down_height, down_width, channels);
+
+        output_val += bottom_data[feat_index] *
+                      shared_mask[mask_c * WARP_SIZE + pixel_id];
+      }
+    }
+
+    int top_index = Loc2Index(n, ph, pw, c, height, width, channels);
+    top_data[top_index] = output_val;
+  }
+}
+
+template <typename scalar_t>
+__global__ void CARAFEBackward_Feature(
+    const int num_kernels, const scalar_t *__restrict__ top_diff,
+    const scalar_t *__restrict__ bottom_masks, const int kernel_size,
+    const int group_size, const int scale_factor, const int channels,
+    const int down_height, const int down_width, const int height,
+    const int width, const int mask_channels,
+    scalar_t *__restrict__ bottom_diff) {
+#if MAXIMIZE_KERNEL_SIZE
+  __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2];
+#else
+  __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T];
+#endif
+
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index > num_kernels - 1) {
+    return;
+  }
+
+  const int pixel_id = threadIdx.x / THREADS_PER_PIXEL;
+  const int split_id = threadIdx.x % THREADS_PER_PIXEL;
+  // (n, c, ph, pw) is an element in the bottom_data
+  index = index / THREADS_PER_PIXEL;
+  const int pw = index % width;
+  const int ph = (index / width) % height;
+  const int n = index / width / height;
+
+  const int start_w = pw - (kernel_size - 1) * scale_factor / 2;
+  const int end_w = pw + (kernel_size - 1) * scale_factor / 2 + 1;
+  const int start_h = ph - (kernel_size - 1) * scale_factor / 2;
+  const int end_h = ph + (kernel_size - 1) * scale_factor / 2 + 1;
+  for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) {
+    const int mask_w = (c % kernel_size) * scale_factor;
+    const int mask_h = (c / kernel_size % kernel_size) * scale_factor;
+    const int mask_x = start_w + mask_w;
+    const int mask_y = start_h + mask_h;
+    if (mask_y < 0 || mask_y > height - 1 || mask_x < 0 || mask_x > width - 1) {
+      shared_mask[c * WARP_SIZE + pixel_id] = 0;
+      continue;
+    }
+    const int mask_group = c / (kernel_size * kernel_size);
+    const int mask_c = (2 * mask_group + 1) * kernel_size * kernel_size - c - 1;
+    int mask_index =
+        Loc2Index(n, mask_c, mask_y, mask_x, mask_channels, height, width);
+    shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index];
+  }
+  __syncthreads();
+  const int channels_per_group = ceilf(channels / (float)group_size);
+#pragma unroll
+  for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
+    int mask_group = c / channels_per_group;
+    int top_index = Loc2Index(n, ph, pw, c, height, width, channels);
+    scalar_t output_val = 0;
+#pragma unroll
+    for (int iy = start_h; iy < end_h; iy += scale_factor) {
+#pragma unroll
+      for (int ix = start_w; ix < end_w; ix += scale_factor) {
+        if (iy < 0 || iy > height - 1 || ix < 0 || ix > width - 1) {
+          continue;
+        }
+        int mask_iy =
+            (iy - ph + (kernel_size - 1) * scale_factor / 2) / scale_factor;
+        int mask_ix =
+            (ix - pw + (kernel_size - 1) * scale_factor / 2) / scale_factor;
+        int mask_c =
+            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+        int feat_index = Loc2Index(n, iy, ix, c, height, width, channels);
+        output_val +=
+            shared_mask[mask_c * WARP_SIZE + pixel_id] * top_diff[feat_index];
+      }
+    }
+    bottom_diff[top_index] = output_val;
+  }
+}
+
+template <typename scalar_t>
+__global__ void FeatureSum(const int num_kernels,
+                           const scalar_t *__restrict__ input_data,
+                           const int scale_factor, const int channels,
+                           const int height, const int width,
+                           scalar_t *__restrict__ output_data) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index > num_kernels - 1) {
+    return;
+  }
+  const int split_id = threadIdx.x % THREADS_PER_PIXEL;
+  index = index / THREADS_PER_PIXEL;
+  const int pw = index % width;
+  const int ph = (index / width) % height;
+  const int n = index / width / height;
+  for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
+    scalar_t output_val = 0;
+    for (int iy = ph * scale_factor; iy < (ph + 1) * scale_factor; iy++) {
+      for (int ix = pw * scale_factor; ix < (pw + 1) * scale_factor; ix++) {
+        int input_id = Loc2Index(n, iy, ix, c, height * scale_factor,
+                                 width * scale_factor, channels);
+        output_val += input_data[input_id];
+      }
+    }
+    const int output_id = Loc2Index(n, ph, pw, c, height, width, channels);
+    output_data[output_id] = output_val;
+  }
+}
+
+template <typename scalar_t>
+__global__ void CARAFEBackward_Mask(const int num_kernels,
+                                    const scalar_t *__restrict__ top_diff,
+                                    const scalar_t *__restrict__ bottom_data,
+                                    const int kernel_size, const int group_size,
+                                    const int scale_factor, const int channels,
+                                    const int down_height, const int down_width,
+                                    const int height, const int width,
+                                    const int mask_channels,
+                                    scalar_t *__restrict__ mask_diff) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index > num_kernels - 1) {
+    return;
+  }
+
+  const int lane_id = index % WARP_SIZE;
+  index = index / WARP_SIZE;
+  const int mask_c = index % mask_channels;
+  // (n, c, ph, pw) is an element in the bottom_data
+  index = index / mask_channels;
+  const int pw = index % width;
+  const int ph = (index / width) % height;
+  const int n = index / width / height;
+
+  const int down_pw = pw / scale_factor;
+  const int down_ph = ph / scale_factor;
+
+  const int mask_group = mask_c / (kernel_size * kernel_size);
+  const int mask_loc = mask_c % (kernel_size * kernel_size);
+
+  const int offset_x = mask_loc % kernel_size - (kernel_size - 1) / 2;
+  const int offset_y =
+      mask_loc / kernel_size % kernel_size - (kernel_size - 1) / 2;
+
+  const int down_x = down_pw + offset_x;
+  const int down_y = down_ph + offset_y;
+
+  scalar_t output_val = 0;
+
+  if (down_y >= 0 && down_y <= down_height - 1 && down_x >= 0 &&
+      down_x <= down_width - 1) {
+    const int channels_per_mask = ceilf(channels / (float)group_size);
+    const int start = channels_per_mask * mask_group;
+    const int end = min(channels_per_mask * (mask_group + 1), channels);
+    for (int c = start + lane_id; c < end; c += WARP_SIZE) {
+      int bottom_id =
+          Loc2Index(n, down_y, down_x, c, down_height, down_width, channels);
+      int top_id = Loc2Index(n, ph, pw, c, height, width, channels);
+      output_val += top_diff[top_id] * bottom_data[bottom_id];
+    }
+  }
+  __syncwarp();
+  output_val = warpReduceSum(output_val);
+  if (lane_id == 0) {
+    const int mask_id =
+        Loc2Index(n, ph, pw, mask_c, height, width, mask_channels);
+    mask_diff[mask_id] = output_val;
+  }
+}
+
+#endif  // CARAFE_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..6f375162c0819d829d93c4755a2a15f39e6ced37
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh
@@ -0,0 +1,110 @@
+#ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH
+#define CARAFE_NAIVE_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+__device__ inline int Loc2Index(const int n, const int c, const int h,
+                                const int w, const int channel_num,
+                                const int height, const int width) {
+  int index = w + (h + (c + n * channel_num) * height) * width;
+  return index;
+}
+
+template <typename scalar_t>
+__global__ void carafe_naive_forward_cuda_kernel(
+    const int nthreads, const scalar_t *bottom_data,
+    const scalar_t *bottom_masks, scalar_t *top_data, const int kernel_size,
+    const int group_size, const int scale_factor, const int channels,
+    const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the bottom_data
+    int pw = index % width;
+    int ph = (index / width) % height;
+    int c = (index / width / height) % channels;
+    int n = index / width / height / channels;
+
+    int mask_channels = kernel_size * kernel_size * group_size;
+    int mask_group = c / (channels / group_size);
+
+    int down_pw = pw / scale_factor;
+    int down_ph = ph / scale_factor;
+    int down_width = width / scale_factor;
+    int down_height = height / scale_factor;
+    int start_w = down_pw - (kernel_size - 1) / 2;
+    int end_w = down_pw + (kernel_size - 1) / 2 + 1;
+    int start_h = down_ph - (kernel_size - 1) / 2;
+    int end_h = down_ph + (kernel_size - 1) / 2 + 1;
+
+    scalar_t output_val = 0;
+    for (int iy = start_h; iy < end_h; iy++) {
+      for (int ix = start_w; ix < end_w; ix++) {
+        if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
+          continue;
+        }
+        int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
+        int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
+        int mask_c =
+            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+        int feat_index =
+            Loc2Index(n, c, iy, ix, channels, down_height, down_width);
+        int mask_index =
+            Loc2Index(n, mask_c, ph, pw, mask_channels, height, width);
+        output_val += bottom_data[feat_index] * bottom_masks[mask_index];
+      }
+    }
+    top_data[index] = output_val;
+  }
+}
+
+template <typename scalar_t>
+__global__ void carafe_naive_backward_cuda_kernel(
+    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_data,
+    const scalar_t *bottom_masks, scalar_t *bottom_diff, scalar_t *mask_diff,
+    const int kernel_size, const int group_size, const int scale_factor,
+    const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the bottom_data
+    int pw = index % width;
+    int ph = (index / width) % height;
+    int c = (index / width / height) % channels;
+    int n = index / width / height / channels;
+
+    int mask_channels = kernel_size * kernel_size * group_size;
+    int mask_group = c / (channels / group_size);
+
+    int down_pw = pw / scale_factor;
+    int down_ph = ph / scale_factor;
+    int down_width = width / scale_factor;
+    int down_height = height / scale_factor;
+    int start_w = down_pw - (kernel_size - 1) / 2;
+    int end_w = down_pw + (kernel_size - 1) / 2 + 1;
+    int start_h = down_ph - (kernel_size - 1) / 2;
+    int end_h = down_ph + (kernel_size - 1) / 2 + 1;
+
+    for (int iy = start_h; iy < end_h; iy++) {
+      for (int ix = start_w; ix < end_w; ix++) {
+        if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
+          continue;
+        }
+        int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
+        int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
+        int mask_c =
+            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+        int feat_index =
+            Loc2Index(n, c, iy, ix, channels, down_height, down_width);
+        int mask_index =
+            Loc2Index(n, mask_c, ph, pw, mask_channels, height, width);
+        atomicAdd(bottom_diff + feat_index,
+                  bottom_masks[mask_index] * top_diff[index]);
+        atomicAdd(mask_diff + mask_index,
+                  bottom_data[feat_index] * top_diff[index]);
+      }
+    }
+  }
+}
+
+#endif  // CARAFE_NAIVE_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..0dd9c33c664439d6b3a86ebc9f9bc788ac4deb29
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh
@@ -0,0 +1,185 @@
+#ifndef CC_ATTENTION_CUDA_KERNEL_CUH
+#define CC_ATTENTION_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void ca_forward_kernel(const T *t, const T *f, T *weight, int num,
+                                  int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int z = blockIdx.z;
+
+  if (x < width && y < height && z < height + width - 1) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int plane = 0; plane < chn; ++plane) {
+        T _t = t[(batch * chn + plane) * sp + y * width + x];
+
+        if (z < width) {
+          int i = z;
+          T _f = f[(batch * chn + plane) * sp + y * width + i];
+          weight[(batch * len + i) * sp + y * width + x] += _t * _f;
+        } else {
+          int i = z - width;
+          int j = i < y ? i : i + 1;
+
+          T _f = f[(batch * chn + plane) * sp + j * width + x];
+          weight[(batch * len + width + i) * sp + y * width + x] += _t * _f;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void ca_backward_kernel_t(const T *dw, const T *t, const T *f, T *dt,
+                                     int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int i = 0; i < width; ++i) {
+        T _dw = dw[(batch * len + i) * sp + y * width + x];
+        T _f = f[(batch * chn + plane) * sp + y * width + i];
+        dt[(batch * chn + plane) * sp + y * width + x] += _dw * _f;
+      }
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+        int j = i < y ? i : i - 1;
+
+        T _dw = dw[(batch * len + width + j) * sp + y * width + x];
+        T _f = f[(batch * chn + plane) * sp + i * width + x];
+        dt[(batch * chn + plane) * sp + y * width + x] += _dw * _f;
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void ca_backward_kernel_f(const T *dw, const T *t, const T *f, T *df,
+                                     int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int i = 0; i < width; ++i) {
+        T _dw = dw[(batch * len + x) * sp + y * width + i];
+        T _t = t[(batch * chn + plane) * sp + y * width + i];
+        df[(batch * chn + plane) * sp + y * width + x] += _dw * _t;
+      }
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+        int j = i > y ? y : y - 1;
+
+        T _dw = dw[(batch * len + width + j) * sp + i * width + x];
+        T _t = t[(batch * chn + plane) * sp + i * width + x];
+        df[(batch * chn + plane) * sp + y * width + x] += _dw * _t;
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void ca_map_forward_kernel(const T *weight, const T *g, T *out,
+                                      int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int i = 0; i < width; ++i) {
+        T _g = g[(batch * chn + plane) * sp + y * width + i];
+        T _w = weight[(batch * len + i) * sp + y * width + x];
+        out[(batch * chn + plane) * sp + y * width + x] += _g * _w;
+      }
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+
+        int j = i < y ? i : i - 1;
+
+        T _g = g[(batch * chn + plane) * sp + i * width + x];
+        T _w = weight[(batch * len + width + j) * sp + y * width + x];
+        out[(batch * chn + plane) * sp + y * width + x] += _g * _w;
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void ca_map_backward_kernel_w(const T *dout, const T *weight,
+                                         const T *g, T *dw, int num, int chn,
+                                         int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int z = blockIdx.z;
+
+  if (x < width && y < height && z < height + width - 1) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int plane = 0; plane < chn; ++plane) {
+        T _dout = dout[(batch * chn + plane) * sp + y * width + x];
+
+        if (z < width) {
+          int i = z;
+          T _g = g[(batch * chn + plane) * sp + y * width + i];
+          dw[(batch * len + i) * sp + y * width + x] += _dout * _g;
+        } else {
+          int i = z - width;
+          int j = i < y ? i : i + 1;
+
+          T _g = g[(batch * chn + plane) * sp + j * width + x];
+          dw[(batch * len + width + i) * sp + y * width + x] += _dout * _g;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void ca_map_backward_kernel_g(const T *dout, const T *weight,
+                                         const T *g, T *dg, int num, int chn,
+                                         int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int i = 0; i < width; ++i) {
+        T _dout = dout[(batch * chn + plane) * sp + y * width + i];
+        T _w = weight[(batch * len + x) * sp + y * width + i];
+        dg[(batch * chn + plane) * sp + y * width + x] += _dout * _w;
+      }
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+        int j = i > y ? y : y - 1;
+
+        T _dout = dout[(batch * chn + plane) * sp + i * width + x];
+        T _w = weight[(batch * len + width + j) * sp + i * width + x];
+        dg[(batch * chn + plane) * sp + y * width + x] += _dout * _w;
+      }
+    }
+  }
+}
+
+#endif  // CC_ATTENTION_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/common_cuda_helper.hpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/common_cuda_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a9ab6e82f1f50f1ea6fc27b42888efa73290eb28
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/common_cuda_helper.hpp
@@ -0,0 +1,110 @@
+#ifndef COMMON_CUDA_HELPER
+#define COMMON_CUDA_HELPER
+
+#include <cuda.h>
+
+#define CUDA_1D_KERNEL_LOOP(i, n)                              \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 512
+
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+  int max_block_num = 4096;
+  return min(optimal_block_num, max_block_num);
+}
+
+template <typename T>
+__device__ T bilinear_interpolate(const T* input, const int height,
+                                  const int width, T y, T x,
+                                  const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) return 0;
+
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;
+
+  int y_low = (int)y;
+  int x_low = (int)x;
+  int y_high;
+  int x_high;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // do bilinear interpolation
+  T v1 = input[y_low * width + x_low];
+  T v2 = input[y_low * width + x_high];
+  T v3 = input[y_high * width + x_low];
+  T v4 = input[y_high * width + x_high];
+  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  return val;
+}
+
+template <typename T>
+__device__ void bilinear_interpolate_gradient(
+    const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4,
+    int& x_low, int& x_high, int& y_low, int& y_high,
+    const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;
+
+  y_low = (int)y;
+  x_low = (int)x;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  return;
+}
+#endif  // COMMON_CUDA_HELPER
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..b6ddf34c99031abdc7af5192a947e17990835b46
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh
@@ -0,0 +1,362 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer
+ *****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer
+ *********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#ifndef DEFORM_CONV_CUDA_KERNEL_CUH
+#define DEFORM_CONV_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__device__ T deformable_im2col_bilinear(const T *input, const int data_width,
+                                        const int height, const int width, T h,
+                                        T w) {
+  if (h <= -1 || height <= h || w <= -1 || width <= w) {
+    return 0;
+  }
+
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  T lh = h - h_low;
+  T lw = w - w_low;
+  T hh = 1 - lh, hw = 1 - lw;
+
+  T v1 = 0;
+  if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
+  T v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = input[h_low * data_width + w_high];
+  T v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = input[h_high * data_width + w_low];
+  T v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = input[h_high * data_width + w_high];
+
+  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename T>
+__device__ T get_gradient_weight(T argmax_h, T argmax_w, const int h,
+                                 const int w, const int height,
+                                 const int width) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  T weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename T>
+__device__ T get_coordinate_weight(T argmax_h, T argmax_w, const int height,
+                                   const int width, const T *im_data,
+                                   const int data_width, const int bp_dir) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  T weight = 0;
+
+  if (bp_dir == 0) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) *
+                im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) *
+                im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) *
+                im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) *
+                im_data[argmax_h_high * data_width + argmax_w_high];
+  } else if (bp_dir == 1) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) *
+                im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) *
+                im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) *
+                im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) *
+                im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename T>
+__global__ void deformable_im2col_gpu_kernel(
+    const int n, const T *data_im, const T *data_offset, const int height,
+    const int width, const int kernel_h, const int kernel_w, const int pad_h,
+    const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group, const int batch_size,
+    const int num_channels, const int deformable_group, const int height_col,
+    const int width_col, T *data_col) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+    T *data_col_ptr =
+        data_col +
+        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    const T *data_im_ptr =
+        data_im + (b_col * num_channels + c_im) * height * width;
+    const T *data_offset_ptr =
+        data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
+                          kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i) {
+      for (int j = 0; j < kernel_w; ++j) {
+        const int data_offset_h_ptr =
+            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr =
+            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
+            w_col;
+        const T offset_h = data_offset_ptr[data_offset_h_ptr];
+        const T offset_w = data_offset_ptr[data_offset_w_ptr];
+        T val = static_cast<T>(0);
+        const T h_im = h_in + i * dilation_h + offset_h;
+        const T w_im = w_in + j * dilation_w + offset_w;
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+          val = deformable_im2col_bilinear(data_im_ptr, width, height, width,
+                                           h_im, w_im);
+        *data_col_ptr = val;
+        data_col_ptr += batch_size * height_col * width_col;
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void deformable_col2im_gpu_kernel(
+    const int n, const T *data_col, const T *data_offset, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group, const int batch_size,
+    const int deformable_group, const int height_col, const int width_col,
+    T *grad_im) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i =
+        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c =
+        index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const T *data_offset_ptr =
+        data_offset + (b * deformable_group + deformable_group_index) * 2 *
+                          kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr =
+        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr =
+        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const T offset_h = data_offset_ptr[data_offset_h_ptr];
+    const T offset_w = data_offset_ptr[data_offset_w_ptr];
+    const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const T cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const T cur_top_grad = data_col[index];
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++) {
+      for (int dx = -2; dx <= 2; dx++) {
+        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
+            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
+          int cur_bottom_grad_pos =
+              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          T weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data,
+                                         cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void deformable_col2im_coord_gpu_kernel(
+    const int n, const T *data_col, const T *data_im, const T *data_offset,
+    const int channels, const int height, const int width, const int kernel_h,
+    const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group, const int batch_size,
+    const int offset_channels, const int deformable_group, const int height_col,
+    const int width_col, T *grad_offset) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    T val = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const T *data_col_ptr = data_col + deformable_group_index *
+                                           channel_per_deformable_group *
+                                           batch_size * width_col * height_col;
+    const T *data_im_ptr =
+        data_im + (b * deformable_group + deformable_group_index) *
+                      channel_per_deformable_group / kernel_h / kernel_w *
+                      height * width;
+    const T *data_offset_ptr =
+        data_offset + (b * deformable_group + deformable_group_index) * 2 *
+                          kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
+         col_c += col_step) {
+      const int col_pos =
+          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i =
+          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr =
+          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr =
+          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
+           w_out);
+      const T offset_h = data_offset_ptr[data_offset_h_ptr];
+      const T offset_w = data_offset_ptr[data_offset_w_ptr];
+      T inv_h = h_in + i * dilation_h + offset_h;
+      T inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+        inv_h = inv_w = -2;
+      const T weight = get_coordinate_weight(inv_h, inv_w, height, width,
+                                             data_im_ptr + cnt * height * width,
+                                             width, bp_dir);
+      val += weight * data_col_ptr[col_pos];
+      cnt += 1;
+    }
+
+    grad_offset[index] = val;
+  }
+}
+
+#endif  // DEFORM_CONV_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..e6b59b3999e8bbf947c0ff795363a25e3ec3ac49
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh
@@ -0,0 +1,183 @@
+#ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH
+#define DEFORM_ROI_POOL_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void deform_roi_pool_forward_cuda_kernel(
+    const int nthreads, const T* input, const T* rois, const T* offset,
+    T* output, const int pooled_height, const int pooled_width,
+    const T spatial_scale, const int sampling_ratio, const T gamma,
+    const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+
+    // Do not using rounding; this implementation detail is critical
+    T roi_start_w = offset_rois[1] * spatial_scale - 0.5;
+    T roi_start_h = offset_rois[2] * spatial_scale - 0.5;
+    T roi_end_w = offset_rois[3] * spatial_scale - 0.5;
+    T roi_end_h = offset_rois[4] * spatial_scale - 0.5;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    const T* offset_input =
+        input + (roi_batch_ind * channels + c) * height * width;
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h =
+        (sampling_ratio > 0)
+            ? sampling_ratio
+            : static_cast<int>(ceil(roi_height / pooled_height));
+    int roi_bin_grid_w = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : static_cast<int>(ceil(roi_width / pooled_width));
+
+    // Compute roi offset
+    if (offset != NULL) {
+      const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 +
+                              ph * pooled_width + pw;
+      T offset_roi_w = gamma * roi_width * offset_cur_w[0];
+      T offset_roi_h =
+          gamma * roi_height * offset_cur_w[pooled_width * pooled_height];
+      roi_start_w += offset_roi_w;
+      roi_start_h += offset_roi_h;
+    }
+
+    // We do average pooling inside a bin
+    const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1);
+    T output_val = 0.;
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+      const T y = roi_start_h + ph * bin_size_h +
+                  static_cast<T>(iy + .5f) * bin_size_h /
+                      static_cast<T>(roi_bin_grid_h);
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T x = roi_start_w + pw * bin_size_w +
+                    static_cast<T>(ix + .5f) * bin_size_w /
+                        static_cast<T>(roi_bin_grid_w);
+        T val = bilinear_interpolate(offset_input, height, width, y, x, index);
+        output_val += val;
+      }
+    }
+    output[index] = output_val / count;
+  }
+}
+
+template <typename T>
+__global__ void deform_roi_pool_backward_cuda_kernel(
+    const int nthreads, const T* grad_output, const T* input, const T* rois,
+    const T* offset, T* grad_input, T* grad_offset, const int pooled_height,
+    const int pooled_width, const T spatial_scale, const int sampling_ratio,
+    const T gamma, const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+    const T* offset_input =
+        input + ((roi_batch_ind * channels + c) * height * width);
+    T* offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+    // Do not using rounding; this implementation detail is critical
+    T roi_start_w = offset_rois[1] * spatial_scale - 0.5;
+    T roi_start_h = offset_rois[2] * spatial_scale - 0.5;
+    T roi_end_w = offset_rois[3] * spatial_scale - 0.5;
+    T roi_end_h = offset_rois[4] * spatial_scale - 0.5;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h =
+        (sampling_ratio > 0)
+            ? sampling_ratio
+            : static_cast<int>(ceil(roi_height / pooled_height));
+    int roi_bin_grid_w = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : static_cast<int>(ceil(roi_width / pooled_width));
+
+    // Compute roi offset
+    if (offset != NULL) {
+      const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 +
+                              ph * pooled_width + pw;
+      T offset_roi_w = gamma * roi_width * offset_cur_w[0];
+      T offset_roi_h =
+          gamma * roi_height * offset_cur_w[pooled_width * pooled_height];
+      roi_start_w += offset_roi_w;
+      roi_start_h += offset_roi_h;
+    }
+
+    // We do average (integral) pooling inside a bin
+    const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4
+    const T grad_output_this_bin = grad_output[index] / count;
+
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+      const T y = roi_start_h + ph * bin_size_h +
+                  static_cast<T>(iy + .5f) * bin_size_h /
+                      static_cast<T>(roi_bin_grid_h);
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T x = roi_start_w + pw * bin_size_w +
+                    static_cast<T>(ix + .5f) * bin_size_w /
+                        static_cast<T>(roi_bin_grid_w);
+
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                      x_low, x_high, y_low, y_high, index);
+
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          atomicAdd(offset_grad_input + y_low * width + x_low,
+                    grad_output_this_bin * w1);
+          atomicAdd(offset_grad_input + y_low * width + x_high,
+                    grad_output_this_bin * w2);
+          atomicAdd(offset_grad_input + y_high * width + x_low,
+                    grad_output_this_bin * w3);
+          atomicAdd(offset_grad_input + y_high * width + x_high,
+                    grad_output_this_bin * w4);
+          if (offset != NULL) {
+            T input_00 = offset_input[y_low * width + x_low];
+            T input_10 = offset_input[y_low * width + x_high];
+            T input_01 = offset_input[y_high * width + x_low];
+            T input_11 = offset_input[y_high * width + x_high];
+            T ogx = gamma * roi_width * grad_output_this_bin *
+                    (input_11 * (y - y_low) + input_10 * (y_high - y) +
+                     input_01 * (y_low - y) + input_00 * (y - y_high));
+            T ogy = gamma * roi_height * grad_output_this_bin *
+                    (input_11 * (x - x_low) + input_01 * (x_high - x) +
+                     input_10 * (x_low - x) + input_00 * (x - x_high));
+            atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 +
+                          ph * pooled_width + pw,
+                      ogx);
+            atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 +
+                          pooled_width * pooled_height + ph * pooled_width + pw,
+                      ogy);
+          }
+        }
+      }
+    }
+  }
+}
+
+#endif  // DEFORM_ROI_POOL_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..4be8329ae30fc0598cd37cc63f9a4bd07c400a27
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh
@@ -0,0 +1,61 @@
+#ifndef MASKED_CONV2D_CUDA_KERNEL_CUH
+#define MASKED_CONV2D_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename scalar_t>
+__global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
+                                    const int height, const int width,
+                                    const int kernel_h, const int kernel_w,
+                                    const int pad_h, const int pad_w,
+                                    const int64_t *mask_h_idx,
+                                    const int64_t *mask_w_idx,
+                                    const int mask_cnt, scalar_t *data_col) {
+  // mask_cnt * channels
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    const int m_index = index % mask_cnt;
+    const int h_col = mask_h_idx[m_index];
+    const int w_col = mask_w_idx[m_index];
+    const int c_im = index / mask_cnt;
+    const int c_col = c_im * kernel_h * kernel_w;
+    const int h_offset = h_col - pad_h;
+    const int w_offset = w_col - pad_w;
+    scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index;
+    for (int i = 0; i < kernel_h; ++i) {
+      int h_im = h_offset + i;
+      for (int j = 0; j < kernel_w; ++j) {
+        int w_im = w_offset + j;
+        if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+          *data_col_ptr =
+              (scalar_t)data_im[(c_im * height + h_im) * width + w_im];
+        } else {
+          *data_col_ptr = 0.0;
+        }
+        data_col_ptr += mask_cnt;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col,
+                                    const int height, const int width,
+                                    const int channels,
+                                    const int64_t *mask_h_idx,
+                                    const int64_t *mask_w_idx,
+                                    const int mask_cnt, scalar_t *data_im) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    const int m_index = index % mask_cnt;
+    const int h_im = mask_h_idx[m_index];
+    const int w_im = mask_w_idx[m_index];
+    const int c_im = index / mask_cnt;
+    // compute the start and end of the output
+    data_im[(c_im * height + h_im) * width + w_im] = data_col[index];
+  }
+}
+
+#endif  // MASKED_CONV2D_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..9953a0945717d8531f7d59e029b699b88df00b29
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh
@@ -0,0 +1,394 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer
+ *****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer
+ *********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#ifndef MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
+#define MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__device__ T dmcn_im2col_bilinear(const T *input, const int data_width,
+                                  const int height, const int width, T h, T w) {
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  T lh = h - h_low;
+  T lw = w - w_low;
+  T hh = 1 - lh, hw = 1 - lw;
+
+  T v1 = 0;
+  if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
+  T v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = input[h_low * data_width + w_high];
+  T v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = input[h_high * data_width + w_low];
+  T v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = input[h_high * data_width + w_high];
+
+  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename T>
+__device__ T dmcn_get_gradient_weight(T argmax_h, T argmax_w, const int h,
+                                      const int w, const int height,
+                                      const int width) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  T weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename T>
+__device__ T dmcn_get_coordinate_weight(T argmax_h, T argmax_w,
+                                        const int height, const int width,
+                                        const T *im_data, const int data_width,
+                                        const int bp_dir) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  T weight = 0;
+
+  if (bp_dir == 0) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) *
+                im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) *
+                im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) *
+                im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) *
+                im_data[argmax_h_high * data_width + argmax_w_high];
+  } else if (bp_dir == 1) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) *
+                im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) *
+                im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) *
+                im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) *
+                im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename T>
+__global__ void modulated_deformable_im2col_gpu_kernel(
+    const int n, const T *data_im, const T *data_offset, const T *data_mask,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group, const int batch_size,
+    const int num_channels, const int deformable_group, const int height_col,
+    const int width_col, T *data_col) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+
+    T *data_col_ptr =
+        data_col +
+        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    const T *data_im_ptr =
+        data_im + (b_col * num_channels + c_im) * height * width;
+    const T *data_offset_ptr =
+        data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
+                          kernel_h * kernel_w * height_col * width_col;
+
+    const T *data_mask_ptr =
+        data_mask + (b_col * deformable_group + deformable_group_index) *
+                        kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i) {
+      for (int j = 0; j < kernel_w; ++j) {
+        const int data_offset_h_ptr =
+            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr =
+            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
+            w_col;
+        const int data_mask_hw_ptr =
+            ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+        const T offset_h = data_offset_ptr[data_offset_h_ptr];
+        const T offset_w = data_offset_ptr[data_offset_w_ptr];
+        const T mask = data_mask_ptr[data_mask_hw_ptr];
+        T val = static_cast<T>(0);
+        const T h_im = h_in + i * dilation_h + offset_h;
+        const T w_im = w_in + j * dilation_w + offset_w;
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+          val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im,
+                                     w_im);
+        *data_col_ptr = val * mask;
+        data_col_ptr += batch_size * height_col * width_col;
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void modulated_deformable_col2im_gpu_kernel(
+    const int n, const T *data_col, const T *data_offset, const T *data_mask,
+    const int channels, const int height, const int width, const int kernel_h,
+    const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int channel_per_deformable_group, const int batch_size,
+    const int deformable_group, const int height_col, const int width_col,
+    T *grad_im) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i =
+        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c =
+        index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const T *data_offset_ptr =
+        data_offset + (b * deformable_group + deformable_group_index) * 2 *
+                          kernel_h * kernel_w * height_col * width_col;
+    const T *data_mask_ptr =
+        data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
+                        kernel_w * height_col * width_col;
+    const int data_offset_h_ptr =
+        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr =
+        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const int data_mask_hw_ptr =
+        ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+    const T offset_h = data_offset_ptr[data_offset_h_ptr];
+    const T offset_w = data_offset_ptr[data_offset_w_ptr];
+    const T mask = data_mask_ptr[data_mask_hw_ptr];
+    const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const T cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const T cur_top_grad = data_col[index] * mask;
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++) {
+      for (int dx = -2; dx <= 2; dx++) {
+        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
+            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
+          int cur_bottom_grad_pos =
+              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          T weight =
+              dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data,
+                                       cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(
+    const int n, const T *data_col, const T *data_im, const T *data_offset,
+    const T *data_mask, const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int dilation_h,
+    const int dilation_w, const int channel_per_deformable_group,
+    const int batch_size, const int offset_channels, const int deformable_group,
+    const int height_col, const int width_col, T *grad_offset, T *grad_mask) {
+  CUDA_1D_KERNEL_LOOP(index, n) {
+    T val = 0, mval = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const T *data_col_ptr = data_col + deformable_group_index *
+                                           channel_per_deformable_group *
+                                           batch_size * width_col * height_col;
+    const T *data_im_ptr =
+        data_im + (b * deformable_group + deformable_group_index) *
+                      channel_per_deformable_group / kernel_h / kernel_w *
+                      height * width;
+    const T *data_offset_ptr =
+        data_offset + (b * deformable_group + deformable_group_index) * 2 *
+                          kernel_h * kernel_w * height_col * width_col;
+    const T *data_mask_ptr =
+        data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
+                        kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
+         col_c += col_step) {
+      const int col_pos =
+          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i =
+          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr =
+          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr =
+          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
+           w_out);
+      const int data_mask_hw_ptr =
+          (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+      const T offset_h = data_offset_ptr[data_offset_h_ptr];
+      const T offset_w = data_offset_ptr[data_offset_w_ptr];
+      const T mask = data_mask_ptr[data_mask_hw_ptr];
+      T inv_h = h_in + i * dilation_h + offset_h;
+      T inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+        inv_h = inv_w = -2;
+      else
+        mval += data_col_ptr[col_pos] *
+                dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width,
+                                     height, width, inv_h, inv_w);
+      const T weight = dmcn_get_coordinate_weight(
+          inv_h, inv_w, height, width, data_im_ptr + cnt * height * width,
+          width, bp_dir);
+      val += weight * data_col_ptr[col_pos] * mask;
+      cnt += 1;
+    }
+    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+    grad_offset[index] = val;
+    if (offset_c % 2 == 0)
+      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
+      // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
+      // height_col + h) * width_col + w], mask_req, mval);
+      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h *
+                      kernel_w +
+                  offset_c / 2) *
+                     height_col +
+                 h) *
+                    width_col +
+                w] = mval;
+  }
+}
+
+#endif  // MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/nms_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/nms_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..bde6334fa60a8fd4d35617efa37733d8563282b2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/nms_cuda_kernel.cuh
@@ -0,0 +1,69 @@
+#ifndef NMS_CUDA_KERNEL_CUH
+#define NMS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long int) * 8;
+
+__device__ inline bool devIoU(float const *const a, float const *const b,
+                              const int offset, const float threshold) {
+  float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
+  float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
+  float width = fmaxf(right - left + offset, 0.f),
+        height = fmaxf(bottom - top + offset, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + offset) * (a[3] - a[1] + offset);
+  float Sb = (b[2] - b[0] + offset) * (b[3] - b[1] + offset);
+  return interS > threshold * (Sa + Sb - interS);
+}
+
+__global__ void nms_cuda(const int n_boxes, const float iou_threshold,
+                         const int offset, const float *dev_boxes,
+                         unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+  const int tid = threadIdx.x;
+
+  if (row_start > col_start) return;
+
+  const int row_size =
+      fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+      fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  __shared__ float block_boxes[threadsPerBlock * 4];
+  if (tid < col_size) {
+    block_boxes[tid * 4 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0];
+    block_boxes[tid * 4 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1];
+    block_boxes[tid * 4 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2];
+    block_boxes[tid * 4 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3];
+  }
+  __syncthreads();
+
+  if (tid < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + tid;
+    const float *cur_box = dev_boxes + cur_box_idx * 4;
+    int i = 0;
+    unsigned long long int t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = tid + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) {
+        t |= 1ULL << i;
+      }
+    }
+    dev_mask[cur_box_idx * gridDim.y + col_start] = t;
+  }
+}
+#endif  // NMS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/nms_rotated_cuda.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/nms_rotated_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..80bed9681f748390999a2963bd3448570b0dbf6a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/nms_rotated_cuda.cuh
@@ -0,0 +1,135 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+#ifndef NMS_ROTATED_CUDA_CUH
+#define NMS_ROTATED_CUDA_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+#include "box_iou_rotated_utils.hpp"
+
+__host__ __device__ inline int divideUP(const int x, const int y) {
+  return (((x) + (y)-1) / (y));
+}
+
+namespace {
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+}
+
+template <typename T>
+__global__ void nms_rotated_cuda_kernel(const int n_boxes,
+                                        const float iou_threshold,
+                                        const T* dev_boxes,
+                                        unsigned long long* dev_mask,
+                                        const int multi_label) {
+  // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
+
+  if (multi_label == 1) {
+    const int row_start = blockIdx.y;
+    const int col_start = blockIdx.x;
+
+    // if (row_start > col_start) return;
+
+    const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+    const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+    // Compared to nms_cuda_kernel, where each box is represented with 4 values
+    // (x1, y1, x2, y2), each rotated box is represented with 5 values
+    // (x_center, y_center, width, height, angle_degrees) here.
+    __shared__ T block_boxes[threadsPerBlock * 5];
+    if (threadIdx.x < col_size) {
+      block_boxes[threadIdx.x * 6 + 0] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
+      block_boxes[threadIdx.x * 6 + 1] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
+      block_boxes[threadIdx.x * 6 + 2] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
+      block_boxes[threadIdx.x * 6 + 3] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
+      block_boxes[threadIdx.x * 6 + 4] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
+      block_boxes[threadIdx.x * 6 + 5] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
+    }
+    __syncthreads();
+
+    if (threadIdx.x < row_size) {
+      const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+      const T* cur_box = dev_boxes + cur_box_idx * 6;
+      int i = 0;
+      unsigned long long t = 0;
+      int start = 0;
+      if (row_start == col_start) {
+        start = threadIdx.x + 1;
+      }
+      for (i = start; i < col_size; i++) {
+        // Instead of devIoU used by original horizontal nms, here
+        // we use the single_box_iou_rotated function from
+        // box_iou_rotated_utils.h
+        if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 6, 0) >
+            iou_threshold) {
+          t |= 1ULL << i;
+        }
+      }
+      const int col_blocks = divideUP(n_boxes, threadsPerBlock);
+      dev_mask[cur_box_idx * col_blocks + col_start] = t;
+    }
+  } else {
+    const int row_start = blockIdx.y;
+    const int col_start = blockIdx.x;
+
+    // if (row_start > col_start) return;
+
+    const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+    const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+    // Compared to nms_cuda_kernel, where each box is represented with 4 values
+    // (x1, y1, x2, y2), each rotated box is represented with 5 values
+    // (x_center, y_center, width, height, angle_degrees) here.
+    __shared__ T block_boxes[threadsPerBlock * 5];
+    if (threadIdx.x < col_size) {
+      block_boxes[threadIdx.x * 5 + 0] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+      block_boxes[threadIdx.x * 5 + 1] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+      block_boxes[threadIdx.x * 5 + 2] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+      block_boxes[threadIdx.x * 5 + 3] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+      block_boxes[threadIdx.x * 5 + 4] =
+          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+    }
+    __syncthreads();
+
+    if (threadIdx.x < row_size) {
+      const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+      const T* cur_box = dev_boxes + cur_box_idx * 5;
+      int i = 0;
+      unsigned long long t = 0;
+      int start = 0;
+      if (row_start == col_start) {
+        start = threadIdx.x + 1;
+      }
+      for (i = start; i < col_size; i++) {
+        // Instead of devIoU used by original horizontal nms, here
+        // we use the single_box_iou_rotated function from
+        // box_iou_rotated_utils.h
+        if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 5, 0) >
+            iou_threshold) {
+          t |= 1ULL << i;
+        }
+      }
+      const int col_blocks = divideUP(n_boxes, threadsPerBlock);
+      dev_mask[cur_box_idx * col_blocks + col_start] = t;
+    }
+  }
+}
+
+#endif
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/bbox_overlaps.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..aff8ea12eb16cade23a2a43c232229c7a46a079e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
@@ -0,0 +1,36 @@
+#include "parrots_cpp_helper.hpp"
+
+void BBoxOverlapsCUDAKernelLauncher(const DArrayLite bboxes1,
+                                    const DArrayLite bboxes2, DArrayLite ious,
+                                    const int mode, const bool aligned,
+                                    const int offset, cudaStream_t stream);
+
+void bbox_overlaps_cuda(CudaContext& ctx, const SSElement& attr,
+                        const OperatorBase::in_list_t& ins,
+                        OperatorBase::out_list_t& outs) {
+  int mode, offset;
+  bool aligned;
+  SSAttrs(attr)
+      .get<int>("mode", mode)
+      .get<bool>("aligned", aligned)
+      .get<int>("offset", offset)
+      .done();
+
+  const auto& bboxes1 = ins[0];
+  const auto& bboxes2 = ins[1];
+
+  auto& ious = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset,
+                                 stream);
+}
+
+PARROTS_EXTENSION_REGISTER(bbox_overlaps)
+    .attr("mode")
+    .attr("aligned")
+    .attr("offset")
+    .input(2)
+    .output(1)
+    .apply(bbox_overlaps_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8c7abb6ad40ae50a0eaf1f5d82b2cace9c7964f6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu
@@ -0,0 +1,22 @@
+#include "bbox_overlaps_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void BBoxOverlapsCUDAKernelLauncher(const DArrayLite bboxes1,
+                                    const DArrayLite bboxes2, DArrayLite ious,
+                                    const int mode, const bool aligned,
+                                    const int offset, cudaStream_t stream) {
+  int output_size = ious.size();
+  int num_bbox1 = bboxes1.dim(0);
+  int num_bbox2 = bboxes2.dim(0);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      bboxes1.elemType().prim(), ([&] {
+        bbox_overlaps_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                bboxes1.ptr<scalar_t>(), bboxes2.ptr<scalar_t>(),
+                ious.ptr<scalar_t>(), num_bbox1, num_bbox2, mode, aligned,
+                offset);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7eb8b44fae3478a0968eb83bae44916ca786a74b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
@@ -0,0 +1,59 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
+#include "parrots_cpp_helper.hpp"
+
+void box_iou_rotated_cpu_launcher(const DArrayLite boxes1,
+                                  const DArrayLite boxes2, DArrayLite ious,
+                                  const int mode_flag, const bool aligned);
+
+void box_iou_rotated_cuda_launcher(const DArrayLite boxes1,
+                                   const DArrayLite boxes2, DArrayLite ious,
+                                   const int mode_flag, const bool aligned,
+                                   cudaStream_t stream);
+
+void box_iou_rotated_cpu(HostContext& ctx, const SSElement& attr,
+                         const OperatorBase::in_list_t& ins,
+                         OperatorBase::out_list_t& outs) {
+  const auto& boxes1 = ins[0];
+  const auto& boxes2 = ins[1];
+
+  bool aligned;
+  int mode_flag;
+  SSAttrs(attr)
+      .get<bool>("aligned", aligned)
+      .get<int>("mode_flag", mode_flag)
+      .done();
+  auto& ious = outs[0];
+  box_iou_rotated_cpu_launcher(boxes1, boxes2, ious, mode_flag, aligned);
+}
+
+void box_iou_rotated_cuda(CudaContext& ctx, const SSElement& attr,
+                          const OperatorBase::in_list_t& ins,
+                          OperatorBase::out_list_t& outs) {
+  const auto& boxes1 = ins[0];
+  const auto& boxes2 = ins[1];
+
+  bool aligned;
+  int mode_flag;
+  SSAttrs(attr)
+      .get<bool>("aligned", aligned)
+      .get<int>("mode_flag", mode_flag)
+      .done();
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  auto& ious = outs[0];
+  box_iou_rotated_cuda_launcher(boxes1, boxes2, ious, mode_flag, aligned,
+                                stream);
+}
+
+PARROTS_EXTENSION_REGISTER(box_iou_rotated)
+    .attr("aligned")
+    .attr("mode_flag")
+    .input(2)
+    .output(1)
+    .apply(box_iou_rotated_cpu)
+#ifdef PARROTS_USE_CUDA
+    .apply(box_iou_rotated_cuda)
+#endif
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b6490f20e4692968c193ae2324f8ad83d6fd47a0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp
@@ -0,0 +1,36 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
+#include "box_iou_rotated_utils.hpp"
+#include "parrots_cpp_helper.hpp"
+
+template <typename T>
+void box_iou_rotated_cpu_kernel(const DArrayLite boxes1,
+                                const DArrayLite boxes2, DArrayLite ious,
+                                const int mode_flag, const bool aligned) {
+  int output_size = ious.size();
+  int num_boxes1 = boxes1.dim(0);
+  int num_boxes2 = boxes2.dim(0);
+
+  auto ious_ptr = ious.ptr<float>();
+
+  if (aligned) {
+    for (int i = 0; i < output_size; i++) {
+      ious_ptr[i] = single_box_iou_rotated<T>(boxes1[i].ptr<T>(),
+                                              boxes2[i].ptr<T>(), mode_flag);
+    }
+  } else {
+    for (int i = 0; i < num_boxes1; i++) {
+      for (int j = 0; j < num_boxes2; j++) {
+        ious_ptr[i * num_boxes2 + j] = single_box_iou_rotated<T>(
+            boxes1[i].ptr<T>(), boxes2[j].ptr<T>(), mode_flag);
+      }
+    }
+  }
+}
+
+void box_iou_rotated_cpu_launcher(const DArrayLite boxes1,
+                                  const DArrayLite boxes2, DArrayLite ious,
+                                  const int mode_flag, const bool aligned) {
+  box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious, mode_flag, aligned);
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bd90742cd2a7caa020080ee74dab56b8afc4e5ce
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/box_iou_rotated_cuda.cu
@@ -0,0 +1,24 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+#include "box_iou_rotated_cuda.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void box_iou_rotated_cuda_launcher(const DArrayLite boxes1,
+                                   const DArrayLite boxes2, DArrayLite ious,
+                                   const int mode_flag, const bool aligned,
+                                   cudaStream_t stream) {
+  using scalar_t = float;
+
+  int output_size = ious.size();
+  int num_boxes1 = boxes1.dim(0);
+  int num_boxes2 = boxes2.dim(0);
+
+  box_iou_rotated_cuda_kernel<scalar_t>
+      <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+          num_boxes1, num_boxes2, boxes1.ptr<scalar_t>(),
+          boxes2.ptr<scalar_t>(), (scalar_t*)ious.ptr<scalar_t>(), mode_flag,
+          aligned);
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a4a7c763fad09480cc062d33d5096ea468bcb2a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe.cpp
@@ -0,0 +1,84 @@
+#include "parrots_cpp_helper.hpp"
+
+void CARAFEForwardCUDAKernelLauncher(
+    const DArrayLite features, const DArrayLite masks, DArrayLite rfeatures,
+    DArrayLite routput, DArrayLite rmasks, DArrayLite output,
+    const int kernel_size, const int group_size, const int scale_factor,
+    cudaStream_t stream);
+
+void CARAFEBackwardCUDAKernelLauncher(
+    const DArrayLite top_grad, const DArrayLite rfeatures,
+    const DArrayLite masks, DArrayLite rtop_grad, DArrayLite rbottom_grad_hs,
+    DArrayLite rbottom_grad, DArrayLite rmask_grad, DArrayLite bottom_grad,
+    DArrayLite mask_grad, const int kernel_size, const int group_size,
+    const int scale_factor, cudaStream_t stream);
+
+void carafe_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                         const OperatorBase::in_list_t& ins,
+                         OperatorBase::out_list_t& outs) {
+  int kernel_size, group_size, scale_factor;
+  SSAttrs(attr)
+      .get<int>("kernel_size", kernel_size)
+      .get<int>("group_size", group_size)
+      .get<int>("scale_factor", scale_factor)
+      .done();
+
+  const auto& features = ins[0];
+  const auto& masks = ins[1];
+
+  auto& rfeatures = outs[0];
+  auto& routput = outs[1];
+  auto& rmasks = outs[2];
+  auto& output = outs[3];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks,
+                                  output, kernel_size, group_size, scale_factor,
+                                  stream);
+}
+
+void carafe_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                          const OperatorBase::in_list_t& ins,
+                          OperatorBase::out_list_t& outs) {
+  int kernel_size, group_size, scale_factor;
+  SSAttrs(attr)
+      .get<int>("kernel_size", kernel_size)
+      .get<int>("group_size", group_size)
+      .get<int>("scale_factor", scale_factor)
+      .done();
+
+  const auto& top_grad = ins[0];
+  const auto& rfeatures = ins[1];
+  const auto& masks = ins[2];
+
+  auto& rtop_grad = outs[0];
+  auto rbottom_grad_hs = outs[1];
+  auto& rbottom_grad = outs[2];
+  auto& rmask_grad = outs[3];
+  auto& bottom_grad = outs[4];
+  auto& mask_grad = outs[5];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad,
+                                   rbottom_grad_hs, rbottom_grad, rmask_grad,
+                                   bottom_grad, mask_grad, kernel_size,
+                                   group_size, scale_factor, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(carafe_forward)
+    .attr("kernel_size")
+    .attr("group_size")
+    .attr("scale_factor")
+    .input(2)
+    .output(4)
+    .apply(carafe_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(carafe_backward)
+    .attr("kernel_size")
+    .attr("group_size")
+    .attr("scale_factor")
+    .input(3)
+    .output(6)
+    .apply(carafe_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2a95e5f730329babcc3d66b87a22ea6cdae229c0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_cuda.cu
@@ -0,0 +1,144 @@
+#include "carafe_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void CARAFEForwardCUDAKernelLauncher(
+    const DArrayLite features, const DArrayLite masks, DArrayLite rfeatures,
+    DArrayLite routput, DArrayLite rmasks, DArrayLite output,
+    const int kernel_size, const int group_size, const int scale_factor,
+    cudaStream_t stream) {
+  const int batch_size = output.dim(0);
+  const int channels = output.dim(1);
+  const int output_height = output.dim(2);
+  const int output_width = output.dim(3);
+
+  const int input_height = features.dim(2);
+  const int input_width = features.dim(3);
+
+  const int mask_channels = masks.dim(1);
+
+  // one warp per pixel
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.elemType().prim(), ([&] {
+        const int dh = divideUP(channels, kTileDim);
+        const int dw = divideUP(input_height * input_width, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, channels, input_height * input_width, dh, dw,
+                features.ptr<scalar_t>(), rfeatures.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.elemType().prim(), ([&] {
+        const int dh = divideUP(mask_channels, kTileDim);
+        const int dw = divideUP(output_height * output_width, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, mask_channels, output_height * output_width, dh, dw,
+                masks.ptr<scalar_t>(), rmasks.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.elemType().prim(), ([&] {
+        const int num_kernels =
+            batch_size * output_height * output_width * THREADS_PER_PIXEL;
+
+        CARAFEForward<scalar_t><<<divideUP(num_kernels, THREADS_PER_BLOCK),
+                                  THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, rfeatures.ptr<scalar_t>(), rmasks.ptr<scalar_t>(),
+            kernel_size, group_size, scale_factor, channels, input_height,
+            input_width, output_height, output_width, mask_channels,
+            routput.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.elemType().prim(), ([&] {
+        const int dh = divideUP(output_height * output_width, kTileDim);
+        const int dw = divideUP(channels, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, output_height * output_width, channels, dh, dw,
+                routput.ptr<scalar_t>(), output.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void CARAFEBackwardCUDAKernelLauncher(
+    const DArrayLite top_grad, const DArrayLite rfeatures,
+    const DArrayLite masks, DArrayLite rtop_grad, DArrayLite rbottom_grad_hs,
+    DArrayLite rbottom_grad, DArrayLite rmask_grad, DArrayLite bottom_grad,
+    DArrayLite mask_grad, const int kernel_size, const int group_size,
+    const int scale_factor, cudaStream_t stream) {
+  const int batch_size = top_grad.dim(0);
+  const int channels = top_grad.dim(1);
+  const int output_height = top_grad.dim(2);
+  const int output_width = top_grad.dim(3);
+
+  const int input_height = bottom_grad.dim(2);
+  const int input_width = bottom_grad.dim(3);
+
+  const int mask_channels = masks.dim(1);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.elemType().prim(), ([&] {
+        const int dh = divideUP(channels, kTileDim);
+        const int dw = divideUP(output_height * output_width, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, channels, output_height * output_width, dh, dw,
+                top_grad.ptr<scalar_t>(), rtop_grad.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.elemType().prim(), ([&] {
+        const int num_kernels =
+            batch_size * output_height * output_width * THREADS_PER_PIXEL;
+
+        CARAFEBackward_Feature<scalar_t>
+            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
+               stream>>>(num_kernels, rtop_grad.ptr<scalar_t>(),
+                         masks.ptr<scalar_t>(), kernel_size, group_size,
+                         scale_factor, channels, input_height, input_width,
+                         output_height, output_width, mask_channels,
+                         rbottom_grad_hs.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.elemType().prim(), ([&] {
+        const int num_kernels =
+            batch_size * input_height * input_width * THREADS_PER_PIXEL;
+
+        FeatureSum<scalar_t><<<divideUP(num_kernels, THREADS_PER_BLOCK),
+                               THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, rbottom_grad_hs.ptr<scalar_t>(), scale_factor,
+            channels, input_height, input_width, rbottom_grad.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.elemType().prim(), ([&] {
+        const int dh = divideUP(input_height * input_width, kTileDim);
+        const int dw = divideUP(channels, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, input_height * input_width, channels, dh, dw,
+                rbottom_grad.ptr<scalar_t>(), bottom_grad.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.elemType().prim(), ([&] {
+        const int num_kernels = batch_size * output_height * output_width *
+                                mask_channels * WARP_SIZE;
+
+        CARAFEBackward_Mask<scalar_t>
+            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
+               stream>>>(num_kernels, rtop_grad.ptr<scalar_t>(),
+                         rfeatures.ptr<scalar_t>(), kernel_size, group_size,
+                         scale_factor, channels, input_height, input_width,
+                         output_height, output_width, mask_channels,
+                         rmask_grad.ptr<scalar_t>());
+      }));
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.elemType().prim(), ([&] {
+        const int dh = divideUP(output_height * output_width, kTileDim);
+        const int dw = divideUP(mask_channels, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, output_height * output_width, mask_channels, dh, dw,
+                rmask_grad.ptr<scalar_t>(), mask_grad.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_naive.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_naive.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1a9047a62c1b1d02e36770960540455cf263574
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_naive.cpp
@@ -0,0 +1,73 @@
+#include "parrots_cpp_helper.hpp"
+
+void CARAFENAIVEForwardCUDAKernelLauncher(
+    const DArrayLite features, const DArrayLite masks, DArrayLite output,
+    const int kernel_size, const int group_size, const int scale_factor,
+    cudaStream_t stream);
+
+void CARAFENAIVEBackwardCUDAKernelLauncher(
+    const DArrayLite top_grad, const DArrayLite features,
+    const DArrayLite masks, DArrayLite bottom_grad, DArrayLite mask_grad,
+    const int kernel_size, const int group_size, const int scale_factor,
+    cudaStream_t stream);
+
+void carafe_naive_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                               const OperatorBase::in_list_t& ins,
+                               OperatorBase::out_list_t& outs) {
+  int kernel_size, group_size, scale_factor;
+  SSAttrs(attr)
+      .get<int>("kernel_size", kernel_size)
+      .get<int>("group_size", group_size)
+      .get<int>("scale_factor", scale_factor)
+      .done();
+
+  const auto& features = ins[0];
+  const auto& masks = ins[1];
+
+  auto& output = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size,
+                                       group_size, scale_factor, stream);
+}
+
+void carafe_naive_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                                const OperatorBase::in_list_t& ins,
+                                OperatorBase::out_list_t& outs) {
+  int kernel_size, group_size, scale_factor;
+  SSAttrs(attr)
+      .get<int>("kernel_size", kernel_size)
+      .get<int>("group_size", group_size)
+      .get<int>("scale_factor", scale_factor)
+      .done();
+
+  const auto& top_grad = ins[0];
+  const auto& features = ins[1];
+  const auto& masks = ins[2];
+
+  auto& bottom_grad = outs[0];
+  auto& mask_grad = outs[1];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad,
+                                        mask_grad, kernel_size, group_size,
+                                        scale_factor, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(carafe_naive_forward)
+    .attr("kernel_size")
+    .attr("group_size")
+    .attr("scale_factor")
+    .input(2)
+    .output(1)
+    .apply(carafe_naive_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(carafe_naive_backward)
+    .attr("kernel_size")
+    .attr("group_size")
+    .attr("scale_factor")
+    .input(3)
+    .output(2)
+    .apply(carafe_naive_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2592ece96c3abae924cb4cda0d770274aabe33f7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu
@@ -0,0 +1,46 @@
+#include "carafe_naive_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void CARAFENAIVEForwardCUDAKernelLauncher(
+    const DArrayLite features, const DArrayLite masks, DArrayLite output,
+    const int kernel_size, const int group_size, const int scale_factor,
+    cudaStream_t stream) {
+  int output_size = output.size();
+  int channels = output.dim(1);
+  int height = output.dim(2);
+  int width = output.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.elemType().prim(), ([&] {
+        carafe_naive_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, features.ptr<scalar_t>(), masks.ptr<scalar_t>(),
+                output.ptr<scalar_t>(), kernel_size, group_size, scale_factor,
+                channels, height, width);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void CARAFENAIVEBackwardCUDAKernelLauncher(
+    const DArrayLite top_grad, const DArrayLite features,
+    const DArrayLite masks, DArrayLite bottom_grad, DArrayLite mask_grad,
+    const int kernel_size, const int group_size, const int scale_factor,
+    cudaStream_t stream) {
+  int output_size = top_grad.size();
+  int channels = top_grad.dim(1);
+  int height = top_grad.dim(2);
+  int width = top_grad.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.elemType().prim(), ([&] {
+        carafe_naive_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, top_grad.ptr<scalar_t>(), features.ptr<scalar_t>(),
+                masks.ptr<scalar_t>(), bottom_grad.ptr<scalar_t>(),
+                mask_grad.ptr<scalar_t>(), kernel_size, group_size,
+                scale_factor, channels, height, width);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/cc_attention.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/cc_attention.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..efc7c0551192f813f7b31ff33e7d7acbce748fe2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/cc_attention.cpp
@@ -0,0 +1,88 @@
+#include "parrots_cpp_helper.hpp"
+
+void CAForwardCUDAKernelLauncher(const DArrayLite t, const DArrayLite f,
+                                 DArrayLite weight, CudaContext &ctx,
+                                 cudaStream_t stream);
+
+void CABackwardCUDAKernelLauncher(const DArrayLite dw, const DArrayLite t,
+                                  const DArrayLite f, DArrayLite dt,
+                                  DArrayLite df, CudaContext &ctx,
+                                  cudaStream_t stream);
+
+void CAMapForwardCUDAKernelLauncher(const DArrayLite weight, const DArrayLite g,
+                                    DArrayLite out, CudaContext &ctx,
+                                    cudaStream_t stream);
+
+void CAMapBackwardCUDAKernelLauncher(const DArrayLite dout,
+                                     const DArrayLite weight,
+                                     const DArrayLite g, DArrayLite dw,
+                                     DArrayLite dg, CudaContext &ctx,
+                                     cudaStream_t stream);
+
+void ca_forward_cuda(CudaContext &ctx, const SSElement &attr,
+                     const OperatorBase::in_list_t &ins,
+                     OperatorBase::out_list_t &outs) {
+  const auto &t = ins[0];
+  const auto &f = ins[1];
+  auto &weight = outs[0];
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CAForwardCUDAKernelLauncher(t, f, weight, ctx, stream);
+}
+
+void ca_backward_cuda(CudaContext &ctx, const SSElement &attr,
+                      const OperatorBase::in_list_t &ins,
+                      OperatorBase::out_list_t &outs) {
+  const auto &dw = ins[0];
+  const auto &t = ins[1];
+  const auto &f = ins[2];
+  auto &dt = outs[0];
+  auto &df = outs[1];
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CABackwardCUDAKernelLauncher(dw, t, f, dt, df, ctx, stream);
+}
+
+void ca_map_forward_cuda(CudaContext &ctx, const SSElement &attr,
+                         const OperatorBase::in_list_t &ins,
+                         OperatorBase::out_list_t &outs) {
+  const auto &weight = ins[0];
+  const auto &g = ins[1];
+  auto &out = outs[0];
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CAMapForwardCUDAKernelLauncher(weight, g, out, ctx, stream);
+}
+
+void ca_map_backward_cuda(CudaContext &ctx, const SSElement &attr,
+                          const OperatorBase::in_list_t &ins,
+                          OperatorBase::out_list_t &outs) {
+  const auto &dout = ins[0];
+  const auto &weight = ins[1];
+  const auto &g = ins[2];
+  auto &dw = outs[0];
+  auto &dg = outs[1];
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  CAMapBackwardCUDAKernelLauncher(dout, weight, g, dw, dg, ctx, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(ca_forward)
+    .input(2)
+    .output(1)
+    .apply(ca_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(ca_backward)
+    .input(3)
+    .output(2)
+    .apply(ca_backward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(ca_map_forward)
+    .input(2)
+    .output(1)
+    .apply(ca_map_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(ca_map_backward)
+    .input(3)
+    .output(2)
+    .apply(ca_map_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7656f749f4798d0a9197a9c9de17f38610b4a7bf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu
@@ -0,0 +1,109 @@
+#include "cc_attention_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void CAForwardCUDAKernelLauncher(const DArrayLite t, const DArrayLite f,
+                                 DArrayLite weight, CudaContext &ctx,
+                                 cudaStream_t stream) {
+  auto n = t.dim(0);
+  auto c = t.dim(1);
+  auto h = t.dim(2);
+  auto w = t.dim(3);
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = h + w;
+  dim3 blocks(d1, d2, d3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES(t.elemType().prim(), [&] {
+    ca_forward_kernel<scalar_t>
+        <<<blocks, threads, 0, stream>>>(t.ptr<scalar_t>(), f.ptr<scalar_t>(),
+                                         weight.ptr<scalar_t>(), n, c, h, w);
+  });
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void CABackwardCUDAKernelLauncher(const DArrayLite dw, const DArrayLite t,
+                                  const DArrayLite f, DArrayLite dt,
+                                  DArrayLite df, CudaContext &ctx,
+                                  cudaStream_t stream) {
+  auto n = t.dim(0);
+  auto c = t.dim(1);
+  auto h = t.dim(2);
+  auto w = t.dim(3);
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = c;
+  dim3 blocks(d1, d2, d3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES(t.elemType().prim(), [&] {
+    ca_backward_kernel_t<scalar_t><<<blocks, threads, 0, stream>>>(
+        dw.ptr<scalar_t>(), t.ptr<scalar_t>(), f.ptr<scalar_t>(),
+        dt.ptr<scalar_t>(), n, c, h, w);
+  });
+
+  PARROTS_DISPATCH_FLOATING_TYPES(f.elemType().prim(), [&] {
+    ca_backward_kernel_f<scalar_t><<<blocks, threads, 0, stream>>>(
+        dw.ptr<scalar_t>(), t.ptr<scalar_t>(), f.ptr<scalar_t>(),
+        df.ptr<scalar_t>(), n, c, h, w);
+  });
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void CAMapForwardCUDAKernelLauncher(const DArrayLite weight, const DArrayLite g,
+                                    DArrayLite out, CudaContext &ctx,
+                                    cudaStream_t stream) {
+  auto n = g.dim(0);
+  auto c = g.dim(1);
+  auto h = g.dim(2);
+  auto w = g.dim(3);
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = c;
+  dim3 blocks(d1, d2, d3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES(g.elemType().prim(), [&] {
+    ca_map_forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        weight.ptr<scalar_t>(), g.ptr<scalar_t>(), out.ptr<scalar_t>(), n, c, h,
+        w);
+  });
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void CAMapBackwardCUDAKernelLauncher(const DArrayLite dout,
+                                     const DArrayLite weight,
+                                     const DArrayLite g, DArrayLite dw,
+                                     DArrayLite dg, CudaContext &ctx,
+                                     cudaStream_t stream) {
+  auto n = dout.dim(0);
+  auto c = dout.dim(1);
+  auto h = dout.dim(2);
+  auto w = dout.dim(3);
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = h + w;
+  dim3 blocks(d1, d2, d3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES(weight.elemType().prim(), [&] {
+    ca_map_backward_kernel_w<scalar_t><<<blocks, threads, 0, stream>>>(
+        dout.ptr<scalar_t>(), weight.ptr<scalar_t>(), g.ptr<scalar_t>(),
+        dw.ptr<scalar_t>(), n, c, h, w);
+  });
+
+  PARROTS_DISPATCH_FLOATING_TYPES(g.elemType().prim(), [&] {
+    ca_map_backward_kernel_g<scalar_t><<<blocks, threads, 0, stream>>>(
+        dout.ptr<scalar_t>(), weight.ptr<scalar_t>(), g.ptr<scalar_t>(),
+        dg.ptr<scalar_t>(), n, c, h, w);
+  });
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/corner_pool.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/corner_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dca8cf1d4aadc3e28666b2e3cd9f33b91c85d130
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/corner_pool.cpp
@@ -0,0 +1,83 @@
+// Modified from
+// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src
+#include "parrots_cpp_helper.hpp"
+
+void bottom_pool_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                              const OperatorBase::in_list_t& ins,
+                              OperatorBase::out_list_t& outs) {}
+
+void bottom_pool_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                               const OperatorBase::in_list_t& ins,
+                               OperatorBase::out_list_t& outs) {}
+
+void top_pool_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                           const OperatorBase::in_list_t& ins,
+                           OperatorBase::out_list_t& outs) {}
+
+void top_pool_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                            const OperatorBase::in_list_t& ins,
+                            OperatorBase::out_list_t& outs) {}
+
+void left_pool_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                            const OperatorBase::in_list_t& ins,
+                            OperatorBase::out_list_t& outs) {}
+
+void left_pool_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                             const OperatorBase::in_list_t& ins,
+                             OperatorBase::out_list_t& outs) {}
+
+void right_pool_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                             const OperatorBase::in_list_t& ins,
+                             OperatorBase::out_list_t& outs) {}
+
+void right_pool_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                              const OperatorBase::in_list_t& ins,
+                              OperatorBase::out_list_t& outs) {}
+
+PARROTS_EXTENSION_REGISTER(bottom_pool_forward)
+    .input(1)
+    .output(1)
+    .apply(bottom_pool_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(bottom_pool_backward)
+    .input(2)
+    .output(1)
+    .apply(bottom_pool_backward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(top_pool_forward)
+    .input(1)
+    .output(1)
+    .apply(top_pool_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(top_pool_backward)
+    .input(2)
+    .output(1)
+    .apply(top_pool_backward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(left_pool_forward)
+    .input(1)
+    .output(1)
+    .apply(left_pool_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(left_pool_backward)
+    .input(2)
+    .output(1)
+    .apply(left_pool_backward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(right_pool_forward)
+    .input(1)
+    .output(1)
+    .apply(right_pool_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(right_pool_backward)
+    .input(2)
+    .output(1)
+    .apply(right_pool_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_conv.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c471faab0057158b1d9b50b466ddd8d3523a02db
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_conv.cpp
@@ -0,0 +1,181 @@
+// Copyright (c) 2018, SenseTime.
+#include "parrots_cpp_helper.hpp"
+
+void DeformConvForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite weight, const DArrayLite offset,
+    DArrayLite output, DArrayLite columns, DArrayLite ones, int kW, int kH,
+    int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, int im2col_step, CudaContext& ctx,
+    cudaStream_t stream);
+
+void DeformConvBackwardInputCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite offset,
+    const DArrayLite gradOutput, DArrayLite gradInput, DArrayLite gradOffset,
+    DArrayLite weight, DArrayLite columns, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, int im2col_step, CudaContext& ctx,
+    cudaStream_t stream);
+
+void DeformConvBackwardParametersCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite offset,
+    const DArrayLite gradOutput, DArrayLite gradWeight, DArrayLite columns,
+    DArrayLite ones, int kW, int kH, int dW, int dH, int padW, int padH,
+    int dilationW, int dilationH, int group, int deformable_group, float scale,
+    int im2col_step, CudaContext& ctx, cudaStream_t stream);
+
+void deform_conv_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                              const OperatorBase::in_list_t& ins,
+                              OperatorBase::out_list_t& outs) {
+  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
+      im2col_step;
+  SSAttrs(attr)
+      .get<int>("kW", kW)
+      .get<int>("kH", kH)
+      .get<int>("dW", dW)
+      .get<int>("dH", dH)
+      .get<int>("padW", padW)
+      .get<int>("padH", padH)
+      .get<int>("dilationW", dilationW)
+      .get<int>("dilationH", dilationH)
+      .get<int>("group", group)
+      .get<int>("deformable_group", deformable_group)
+      .get<int>("im2col_step", im2col_step)
+      .done();
+
+  const auto input = ins[0];
+  const auto weight = ins[1];
+  const auto offset = ins[2];
+
+  auto output = outs[0];
+  auto columns = outs[1];
+  auto ones = outs[2];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  DeformConvForwardCUDAKernelLauncher(
+      input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH,
+      dilationW, dilationH, group, deformable_group, im2col_step, ctx, stream);
+}
+
+void deform_conv_backward_input_cuda(CudaContext& ctx, const SSElement& attr,
+                                     const OperatorBase::in_list_t& ins,
+                                     OperatorBase::out_list_t& outs) {
+  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
+      im2col_step;
+  SSAttrs(attr)
+      .get<int>("kW", kW)
+      .get<int>("kH", kH)
+      .get<int>("dW", dW)
+      .get<int>("dH", dH)
+      .get<int>("padW", padW)
+      .get<int>("padH", padH)
+      .get<int>("dilationW", dilationW)
+      .get<int>("dilationH", dilationH)
+      .get<int>("group", group)
+      .get<int>("deformable_group", deformable_group)
+      .get<int>("im2col_step", im2col_step)
+      .done();
+
+  auto input = ins[0];
+  auto offset = ins[1];
+  auto gradOutput = ins[2];
+
+  auto gradInput = outs[0];
+  auto gradOffset = outs[1];
+  auto weight = outs[2];
+  auto columns = outs[3];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  DeformConvBackwardInputCUDAKernelLauncher(
+      input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH,
+      dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
+      im2col_step, ctx, stream);
+}
+
+void deform_conv_backward_parameters_cuda(CudaContext& ctx,
+                                          const SSElement& attr,
+                                          const OperatorBase::in_list_t& ins,
+                                          OperatorBase::out_list_t& outs) {
+  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
+      im2col_step;
+  float scale;
+  SSAttrs(attr)
+      .get<int>("kW", kW)
+      .get<int>("kH", kH)
+      .get<int>("dW", dW)
+      .get<int>("dH", dH)
+      .get<int>("padW", padW)
+      .get<int>("padH", padH)
+      .get<int>("dilationW", dilationW)
+      .get<int>("dilationH", dilationH)
+      .get<int>("group", group)
+      .get<int>("deformable_group", deformable_group)
+      .get<float>("scale", scale)
+      .get<int>("im2col_step", im2col_step)
+      .done();
+
+  auto input = ins[0];
+  auto offset = ins[1];
+  auto gradOutput = ins[2];
+
+  auto gradWeight = outs[0];
+  auto columns = outs[1];
+  auto ones = outs[2];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  DeformConvBackwardParametersCUDAKernelLauncher(
+      input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH,
+      padW, padH, dilationW, dilationH, group, deformable_group, scale,
+      im2col_step, ctx, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(deform_conv_forward)
+    .attr("kW")
+    .attr("kH")
+    .attr("dW")
+    .attr("dH")
+    .attr("padW")
+    .attr("padH")
+    .attr("dilationW")
+    .attr("dilationH")
+    .attr("group")
+    .attr("deformable_group")
+    .attr("im2col_step")
+    .input(3)
+    .output(3)
+    .apply(deform_conv_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(deform_conv_backward_input)
+    .attr("kW")
+    .attr("kH")
+    .attr("dW")
+    .attr("dH")
+    .attr("padW")
+    .attr("padH")
+    .attr("dilationW")
+    .attr("dilationH")
+    .attr("group")
+    .attr("deformable_group")
+    .attr("im2col_step")
+    .input(3)
+    .output(4)
+    .apply(deform_conv_backward_input_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(deform_conv_backward_parameters)
+    .attr("kW")
+    .attr("kH")
+    .attr("dW")
+    .attr("dH")
+    .attr("padW")
+    .attr("padH")
+    .attr("dilationW")
+    .attr("dilationH")
+    .attr("group")
+    .attr("deformable_group")
+    .attr("scale")
+    .attr("im2col_step")
+    .input(3)
+    .output(3)
+    .apply(deform_conv_backward_parameters_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_conv_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_conv_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..203e1cfa0227a9b14927d9da39a07da9da014882
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_conv_cuda.cu
@@ -0,0 +1,518 @@
+#include "deform_conv_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void deformable_im2col(DArrayLite data_im, DArrayLite data_offset,
+                       const int channels, const int height, const int width,
+                       const int ksize_h, const int ksize_w, const int pad_h,
+                       const int pad_w, const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       DArrayLite data_col, cudaStream_t stream) {
+  // num_axes should be smaller than block size
+  // todo: check parallel_imgs is correctly passed in
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.elemType().prim(), ([&] {
+        deformable_im2col_gpu_kernel<scalar_t>
+            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+                num_kernels, data_im.ptr<scalar_t>(),
+                data_offset.ptr<scalar_t>(), height, width, ksize_h, ksize_w,
+                pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+                channel_per_deformable_group, parallel_imgs, channels,
+                deformable_group, height_col, width_col,
+                data_col.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void deformable_col2im(DArrayLite data_col, DArrayLite data_offset,
+                       const int channels, const int height, const int width,
+                       const int ksize_h, const int ksize_w, const int pad_h,
+                       const int pad_w, const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       DArrayLite grad_im, cudaStream_t stream) {
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels =
+      channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.elemType().prim(), ([&] {
+        deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels),
+                                       THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, data_col.ptr<scalar_t>(), data_offset.ptr<scalar_t>(),
+            channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
+            stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+            parallel_imgs, deformable_group, height_col, width_col,
+            grad_im.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void deformable_col2im_coord(
+    DArrayLite data_col, DArrayLite data_im, DArrayLite data_offset,
+    const int channels, const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group, DArrayLite grad_offset,
+    cudaStream_t stream) {
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w *
+                    deformable_group * parallel_imgs;
+  int channel_per_deformable_group =
+      channels * ksize_h * ksize_w / deformable_group;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.elemType().prim(), ([&] {
+        deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels),
+                                             THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, data_col.ptr<scalar_t>(), data_im.ptr<scalar_t>(),
+            data_offset.ptr<scalar_t>(), channels, height, width, ksize_h,
+            ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, parallel_imgs,
+            2 * ksize_h * ksize_w * deformable_group, deformable_group,
+            height_col, width_col, grad_offset.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void deform_conv_shape_check(DArrayLite input, DArrayLite offset,
+                             DArrayLite* gradOutput, DArrayLite weight, int kH,
+                             int kW, int dH, int dW, int padH, int padW,
+                             int dilationH, int dilationW, int group,
+                             int deformable_group) {
+  PARROTS_CHECKARGS(weight.ndims() == 4)
+      << "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: "
+      << weight.ndims();
+
+  PARROTS_CHECKARGS(weight.isContiguous())
+      << "weight tensor has to be contiguous";
+
+  PARROTS_CHECKARGS(kW > 0 && kH > 0)
+      << "kernel size should be greater than zero, but got kH: " << kH
+      << " kW: " << kW;
+
+  PARROTS_CHECKARGS(weight.dim(2) == kH && weight.dim(3) == kW)
+      << "kernel size should be consistent with weight, but got kH: " << kH
+      << " kW: " << kW << " weight.dim(2): " << weight.dim(2)
+      << ", weight.dim(3): " << weight.dim(3);
+
+  PARROTS_CHECKARGS(dW > 0 && dH > 0)
+      << "stride should be greater than zero, but got dH: " << dH
+      << " dW: " << dW;
+
+  PARROTS_CHECKARGS(dilationW > 0 && dilationH > 0)
+      << "dilation should be greater than 0, but got dilationH: " << dilationH
+      << " dilationW: " << dilationW;
+
+  int ndim = input.ndims();
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  PARROTS_CHECKARGS(ndim == 3 || ndim == 4)
+      << "3D or 4D input tensor expected but got: " << ndim;
+
+  size_t nInputPlane = weight.dim(1) * group;
+  size_t inputHeight = input.dim(dimh);
+  size_t inputWidth = input.dim(dimw);
+  size_t nOutputPlane = weight.dim(0);
+  size_t outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+  size_t outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+  PARROTS_CHECKARGS(nInputPlane % deformable_group == 0)
+      << "input channels must divide deformable group size";
+
+  PARROTS_CHECKARGS(outputWidth >= 1 || outputHeight >= 1)
+      << "Given input size: (" << nInputPlane << " x " << inputHeight << " x "
+      << inputWidth << "). Calculated output size: (" << nOutputPlane << " x "
+      << outputHeight << " x " << outputWidth << "). Output size is too small";
+
+  PARROTS_CHECKARGS(input.dim(1) == nInputPlane)
+      << "invalid number of input planes, expected: " << nInputPlane
+      << ", but got: " << input.dim(1);
+
+  PARROTS_CHECKARGS(inputHeight >= kH && inputWidth >= kW)
+      << "input image is smaller than kernel";
+
+  PARROTS_CHECKARGS(offset.dim(2) == outputHeight &&
+                    offset.dim(3) == outputWidth)
+      << "invalid spatial dim of offset, expected height: " << outputHeight
+      << " width: " << outputWidth << ", but got height: " << offset.dim(2)
+      << " width: " << offset.dim(3);
+
+  PARROTS_CHECKARGS(offset.dim(1) == deformable_group * 2 * kH * kW)
+      << "invalid number of channels of offset";
+
+  if (gradOutput != NULL) {
+    PARROTS_CHECKARGS(gradOutput->dim(dimf) == nOutputPlane)
+        << "invalid number of gradOutput planes, expected: " << nOutputPlane
+        << ", but got: " << gradOutput->dim(dimf);
+
+    PARROTS_CHECKARGS(gradOutput->dim(dimh) == outputHeight &&
+                      gradOutput->dim(dimw) == outputWidth)
+        << "invalid dim of gradOutput, expected height: " << outputHeight
+        << " width: " << outputWidth
+        << " , but got height: " << gradOutput->dim(dimh)
+        << " width: " << gradOutput->dim(dimw);
+  }
+}
+
+void DeformConvForwardCUDAKernelLauncher(
+    DArrayLite input, DArrayLite weight, DArrayLite offset, DArrayLite output,
+    DArrayLite columns, DArrayLite ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, int im2col_step, CudaContext& ctx,
+    cudaStream_t stream) {
+  // todo: resize columns to include im2col: done
+  // todo: add im2col_step as input
+  // todo: add new output buffer and transpose it to output (or directly
+  // transpose output) todo: possibly change data indexing because of
+  // parallel_imgs
+
+  deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH,
+                          padW, dilationH, dilationW, group, deformable_group);
+
+  int batch = 1;
+  if (input.ndims() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.dim(0), input.dim(1), input.dim(2)});
+    offset = offset.view({1, offset.dim(0), offset.dim(1), offset.dim(2)});
+  }
+
+  // todo: assert batchsize dividable by im2col_step
+
+  size_t batchSize = input.dim(0);
+  size_t nInputPlane = input.dim(1);
+  size_t inputHeight = input.dim(2);
+  size_t inputWidth = input.dim(3);
+
+  size_t nOutputPlane = weight.dim(0);
+
+  size_t outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  size_t outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  PARROTS_CHECKARGS(offset.dim(0) == batchSize)
+      << "invalid batch size of offset";
+
+  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
+                        outputHeight, outputWidth});
+
+  columns = ctx.createDArrayLite(
+      input.elemType(), DArrayShape(nInputPlane * kW * kH,
+                                    im2col_step * outputHeight * outputWidth));
+  columns.setZeros(ctx.getStream());
+
+  if (ones.ndims() != 2 ||
+      ones.dim(0) * ones.dim(1) < outputHeight * outputWidth) {
+    ones = ctx.createDArrayLite(input.elemType(),
+                                DArrayShape(outputHeight, outputWidth));
+    fill(ctx, ones, *toScalar(1));
+  }
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  auto output_buffer = ctx.createDArrayLite(
+      input.elemType(), DArrayShape(batchSize / im2col_step, nOutputPlane,
+                                    im2col_step * outputHeight, outputWidth));
+  output_buffer.setZeros(ctx.getStream());
+  output_buffer = output_buffer.view(
+      {output_buffer.dim(0), group, output_buffer.dim(1) / group,
+       output_buffer.dim(2) * output_buffer.dim(3)});
+
+  for (size_t elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns,
+                      stream);
+
+    columns = columns.view({group, columns.dim(0) / group, columns.dim(1)});
+    weight = weight.view(
+        {group, nOutputPlane / group, nInputPlane / group * kH * kW});
+
+    for (size_t g = 0; g < group; g++) {
+      auto output_g = output_buffer[elt][g];
+      auto weight_g = weight[g];
+      auto columns_g = columns[g];
+      gemm(ctx, 1, false, weight_g, false, columns_g, 1, output_g);
+    }
+    columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)});
+    weight = weight.view({nOutputPlane, nInputPlane, kH, kW});
+  }
+
+  output_buffer = output_buffer.view(
+      {output_buffer.dim(0), output_buffer.dim(1) * output_buffer.dim(2),
+       output_buffer.dim(3)});
+
+  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
+                                      im2col_step, outputHeight, outputWidth});
+  output_buffer = transpose(ctx, output_buffer, 1, 2);
+  if (!output_buffer.isContiguous()) {
+    output_buffer = ctx.cloneDArrayLite(output_buffer);
+  }
+  copy(ctx, output, output_buffer);
+  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    output = output.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.dim(1), offset.dim(2), offset.dim(3)});
+  }
+}
+
+void DeformConvBackwardInputCUDAKernelLauncher(
+    DArrayLite input, DArrayLite offset, DArrayLite gradOutput,
+    DArrayLite gradInput, DArrayLite gradOffset, DArrayLite weight,
+    DArrayLite columns, int kW, int kH, int dW, int dH, int padW, int padH,
+    int dilationW, int dilationH, int group, int deformable_group,
+    int im2col_step, CudaContext& ctx, cudaStream_t stream) {
+  deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW,
+                          padH, padW, dilationH, dilationW, group,
+                          deformable_group);
+
+  int batch = 1;
+
+  if (input.ndims() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.dim(0), input.dim(1), input.dim(2)});
+    offset = offset.view({1, offset.dim(0), offset.dim(1), offset.dim(2)});
+    gradOutput = gradOutput.view(
+        {1, gradOutput.dim(0), gradOutput.dim(1), gradOutput.dim(2)});
+  }
+
+  size_t batchSize = input.dim(0);
+  size_t nInputPlane = input.dim(1);
+  size_t inputHeight = input.dim(2);
+  size_t inputWidth = input.dim(3);
+
+  size_t nOutputPlane = weight.dim(0);
+
+  size_t outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  size_t outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  PARROTS_CHECKARGS(offset.dim(0) == batchSize)
+      << "invalid batch size of offset";
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  columns = ctx.createDArrayLite(
+      input.elemType(), DArrayShape(nInputPlane * kW * kH,
+                                    im2col_step * outputHeight * outputWidth));
+  columns.setZeros(ctx.getStream());
+
+  // change order of grad output
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput = transpose(ctx, gradOutput, 1, 2);
+  if (!gradOutput.isContiguous()) {
+    gradOutput = ctx.cloneDArrayLite(gradOutput);
+  }
+
+  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                              inputHeight, inputWidth});
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
+                                deformable_group * 2 * kH * kW, outputHeight,
+                                outputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (size_t elt = 0; elt < batchSize / im2col_step; elt++) {
+    // divide into groups
+    columns = columns.view({group, columns.dim(0) / group, columns.dim(1)});
+    weight = weight.view({group, weight.dim(0) / group,
+                          weight.dim(1) * weight.dim(2) * weight.dim(3)});
+    gradOutput = gradOutput.view(
+        {gradOutput.dim(0), group, gradOutput.dim(1) / group,
+         gradOutput.dim(2) * gradOutput.dim(3) * gradOutput.dim(4)});
+
+    for (size_t g = 0; g < group; g++) {
+      auto columns_g = columns[g];
+      gemm(ctx, 1, true, weight[g], false, gradOutput[elt][g], 0, columns_g);
+    }
+
+    columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)});
+    gradOutput = gradOutput.view({gradOutput.dim(0),
+                                  gradOutput.dim(1) * gradOutput.dim(2),
+                                  im2col_step, outputHeight, outputWidth});
+    weight = weight.view({nOutputPlane, nInputPlane, kH, kW});
+
+    deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane,
+                            inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+                            dilationH, dilationW, im2col_step, deformable_group,
+                            gradOffset[elt], stream);
+
+    deformable_col2im(columns, offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, gradInput[elt],
+                      stream);
+  }
+
+  gradOutput = transpose(ctx, gradOutput, 1, 2);
+  if (!gradOutput.isContiguous()) {
+    gradOutput = ctx.cloneDArrayLite(gradOutput);
+  }
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  gradOffset = gradOffset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.dim(1), offset.dim(2), offset.dim(3)});
+    gradOffset = gradOffset.view({offset.dim(1), offset.dim(2), offset.dim(3)});
+  }
+}
+
+void DeformConvBackwardParametersCUDAKernelLauncher(
+    DArrayLite input, DArrayLite offset, DArrayLite gradOutput,
+    DArrayLite gradWeight, DArrayLite columns, DArrayLite ones, int kW, int kH,
+    int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, float scale, int im2col_step, CudaContext& ctx,
+    cudaStream_t stream) {
+  // todo: transpose and reshape outGrad
+  // todo: reshape columns
+  // todo: add im2col_step as input
+
+  deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH,
+                          dW, padH, padW, dilationH, dilationW, group,
+                          deformable_group);
+
+  int batch = 1;
+
+  if (input.ndims() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.dim(0), input.dim(1), input.dim(2)});
+    gradOutput = gradOutput.view(
+        {1, gradOutput.dim(0), gradOutput.dim(1), gradOutput.dim(2)});
+  }
+
+  size_t batchSize = input.dim(0);
+  size_t nInputPlane = input.dim(1);
+  size_t inputHeight = input.dim(2);
+  size_t inputWidth = input.dim(3);
+
+  size_t nOutputPlane = gradWeight.dim(0);
+
+  size_t outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  size_t outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  PARROTS_CHECKARGS(offset.dim(0) == batchSize)
+      << "invalid batch size of offset";
+
+  columns = ctx.createDArrayLite(
+      input.elemType(), DArrayShape(nInputPlane * kW * kH,
+                                    im2col_step * outputHeight * outputWidth));
+  columns.setZeros(ctx.getStream());
+
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput = transpose(ctx, gradOutput, 1, 2);
+  if (!gradOutput.isContiguous()) {
+    gradOutput = ctx.cloneDArrayLite(gradOutput);
+  }
+
+  auto gradOutputBuffer = ctx.cloneDArrayLite(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
+                             im2col_step * outputHeight, outputWidth});
+
+  gradOutput = transpose(ctx, gradOutput, 1, 2);
+  if (!gradOutput.isContiguous()) {
+    gradOutput = ctx.cloneDArrayLite(gradOutput);
+  }
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (size_t elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns,
+                      stream);
+
+    // divide into group
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.dim(0), group, gradOutputBuffer.dim(1) / group,
+         gradOutputBuffer.dim(2) * gradOutputBuffer.dim(3)});
+    columns = columns.view({group, columns.dim(0) / group, columns.dim(1)});
+    gradWeight = gradWeight.view(
+        {group, gradWeight.dim(0) / group,
+         gradWeight.dim(1) * gradWeight.dim(2) * gradWeight.dim(3)});
+
+    for (int g = 0; g < group; g++) {
+      auto gradWeight_g = gradWeight[g];
+      gemm(ctx, scale, false, gradOutputBuffer[elt][g], true, columns[g], 1,
+           gradWeight_g);
+    }
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.dim(0),
+         gradOutputBuffer.dim(1) * gradOutputBuffer.dim(2),
+         im2col_step * outputHeight, outputWidth});
+    columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)});
+    gradWeight = gradWeight.view(
+        {gradWeight.dim(0) * gradWeight.dim(1), nInputPlane / group, kH, kW});
+  }
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_roi_pool.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_roi_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9cd5256bb43923a5ac60c831cc7047310e1ee00d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_roi_pool.cpp
@@ -0,0 +1,93 @@
+#include "parrots_cpp_helper.hpp"
+
+void DeformRoIPoolForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite rois, const DArrayLite offset,
+    DArrayLite output, int pooled_height, int pooled_width, float spatial_scale,
+    int sampling_ratio, float gamma, cudaStream_t stream);
+
+void DeformRoIPoolBackwardCUDAKernelLauncher(
+    const DArrayLite grad_output, const DArrayLite input, const DArrayLite rois,
+    const DArrayLite offset, DArrayLite grad_input, DArrayLite grad_offset,
+    int pooled_height, int pooled_width, float spatial_scale,
+    int sampling_ratio, float gamma, cudaStream_t stream);
+
+void deform_roi_pool_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                                  const OperatorBase::in_list_t& ins,
+                                  OperatorBase::out_list_t& outs) {
+  int pooled_height;
+  int pooled_width;
+  float spatial_scale;
+  int sampling_ratio;
+  float gamma;
+  SSAttrs(attr)
+      .get<int>("pooled_height", pooled_height)
+      .get<int>("pooled_width", pooled_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .get<int>("sampling_ratio", sampling_ratio)
+      .get<float>("gamma", gamma)
+      .done();
+
+  const auto& input = ins[0];
+  const auto& rois = ins[1];
+  const auto& offset = ins[2];
+
+  auto& output = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  DeformRoIPoolForwardCUDAKernelLauncher(
+      input, rois, offset, output, pooled_height, pooled_width, spatial_scale,
+      sampling_ratio, gamma, stream);
+}
+
+void deform_roi_pool_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                                   const OperatorBase::in_list_t& ins,
+                                   OperatorBase::out_list_t& outs) {
+  int pooled_height;
+  int pooled_width;
+  float spatial_scale;
+  int sampling_ratio;
+  float gamma;
+
+  SSAttrs(attr)
+      .get<int>("pooled_height", pooled_height)
+      .get<int>("pooled_width", pooled_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .get<int>("sampling_ratio", sampling_ratio)
+      .get<float>("gamma", gamma)
+      .done();
+
+  const auto& grad_output = ins[0];
+  const auto& input = ins[1];
+  const auto& rois = ins[2];
+  const auto& offset = ins[3];
+
+  auto& grad_input = outs[0];
+  auto& grad_offset = outs[1];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  DeformRoIPoolBackwardCUDAKernelLauncher(
+      grad_output, input, rois, offset, grad_input, grad_offset, pooled_height,
+      pooled_width, spatial_scale, sampling_ratio, gamma, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(deform_roi_pool_forward)
+    .attr("pooled_height")
+    .attr("pooled_width")
+    .attr("spatial_scale")
+    .attr("sampling_ratio")
+    .attr("gamma")
+    .input(3)
+    .output(1)
+    .apply(deform_roi_pool_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(deform_roi_pool_backward)
+    .attr("pooled_height")
+    .attr("pooled_width")
+    .attr("spatial_scale")
+    .attr("sampling_ratio")
+    .attr("gamma")
+    .input(4)
+    .output(2)
+    .apply(deform_roi_pool_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7081ebf0c9a45ca6a979c9cae6505e26e25313a8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu
@@ -0,0 +1,48 @@
+#include "deform_roi_pool_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void DeformRoIPoolForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite rois, const DArrayLite offset,
+    DArrayLite output, int pooled_height, int pooled_width, float spatial_scale,
+    int sampling_ratio, float gamma, cudaStream_t stream) {
+  int output_size = output.size();
+  int channels = input.dim(1);
+  int height = input.dim(2);
+  int width = input.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        deform_roi_pool_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+                offset.ptr<scalar_t>(), output.ptr<scalar_t>(), pooled_height,
+                pooled_width, spatial_scale, sampling_ratio, gamma, channels,
+                height, width);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void DeformRoIPoolBackwardCUDAKernelLauncher(
+    const DArrayLite grad_output, const DArrayLite input, const DArrayLite rois,
+    const DArrayLite offset, DArrayLite grad_input, DArrayLite grad_offset,
+    int pooled_height, int pooled_width, float spatial_scale,
+    int sampling_ratio, float gamma, cudaStream_t stream) {
+  int output_size = grad_output.size();
+  int channels = grad_input.dim(1);
+  int height = grad_input.dim(2);
+  int width = grad_input.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.elemType().prim(), ([&] {
+        deform_roi_pool_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.ptr<scalar_t>(), input.ptr<scalar_t>(),
+                rois.ptr<scalar_t>(), offset.ptr<scalar_t>(),
+                grad_input.ptr<scalar_t>(), grad_offset.ptr<scalar_t>(),
+                pooled_height, pooled_width, spatial_scale, sampling_ratio,
+                gamma, channels, height, width);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/focal_loss.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/focal_loss.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..032e1384b71182636af9141e026484571374d0c0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/focal_loss.cpp
@@ -0,0 +1,130 @@
+// Copyright (c) 2018, SenseTime.
+#include "parrots_cpp_helper.hpp"
+
+void SigmoidFocalLossForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite target, const DArrayLite weight,
+    DArrayLite output, float gamma, float alpha, cudaStream_t stream);
+
+void SigmoidFocalLossBackwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite target, const DArrayLite weight,
+    DArrayLite grad_input, float gamma, float alpha, cudaStream_t stream);
+
+void SoftmaxFocalLossForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite target, const DArrayLite weight,
+    DArrayLite output, float gamma, float alpha, cudaStream_t stream);
+
+void SoftmaxFocalLossBackwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite target, const DArrayLite weight,
+    DArrayLite buff, DArrayLite grad_input, float gamma, float alpha,
+    cudaStream_t stream);
+
+void sigmoid_focal_loss_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                                     const OperatorBase::in_list_t& ins,
+                                     OperatorBase::out_list_t& outs) {
+  float gamma;
+  float alpha;
+  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
+
+  // get inputs and outputs
+  const auto& input = ins[0];
+  const auto& target = ins[1];
+  const auto& weight = ins[2];
+
+  auto& output = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+
+  SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
+                                            gamma, alpha, stream);
+}
+
+void sigmoid_focal_loss_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                                      const OperatorBase::in_list_t& ins,
+                                      OperatorBase::out_list_t& outs) {
+  float gamma;
+  float alpha;
+  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
+
+  // get inputs and outputs
+  const auto& input = ins[0];
+  const auto& target = ins[1];
+  const auto& weight = ins[2];
+
+  auto& grad_input = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input,
+                                             gamma, alpha, stream);
+}
+
+void softmax_focal_loss_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                                     const OperatorBase::in_list_t& ins,
+                                     OperatorBase::out_list_t& outs) {
+  float gamma;
+  float alpha;
+  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
+
+  // get inputs and outputs
+  const auto& input = ins[0];
+  const auto& target = ins[1];
+  const auto& weight = ins[2];
+
+  auto& grad_input = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+
+  SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, grad_input,
+                                            gamma, alpha, stream);
+}
+
+void softmax_focal_loss_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                                      const OperatorBase::in_list_t& ins,
+                                      OperatorBase::out_list_t& outs) {
+  float gamma;
+  float alpha;
+  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
+
+  // get inputs and outputs
+  const auto& input = ins[0];
+  const auto& target = ins[1];
+  const auto& weight = ins[2];
+
+  auto& buff = outs[0];
+  auto& grad_input = outs[1];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff,
+                                             grad_input, gamma, alpha, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_forward)
+    .attr("gamma")
+    .attr("alpha")
+    .input(3)
+    .output(1)
+    .apply(sigmoid_focal_loss_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_backward)
+    .attr("gamma")
+    .attr("alpha")
+    .input(3)
+    .output(1)
+    .apply(sigmoid_focal_loss_backward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(softmax_focal_loss_forward)
+    .attr("gamma")
+    .attr("alpha")
+    .input(3)
+    .output(1)
+    .apply(softmax_focal_loss_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(softmax_focal_loss_backward)
+    .attr("gamma")
+    .attr("alpha")
+    .input(3)
+    .output(2)
+    .apply(softmax_focal_loss_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/focal_loss_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/focal_loss_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2bba0dcc9139ff43d7f0bff31b7c6e4b7cc41dc0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/focal_loss_cuda.cu
@@ -0,0 +1,88 @@
+#include "parrots_cuda_helper.hpp"
+#include "sigmoid_focal_loss_cuda_kernel.cuh"
+#include "softmax_focal_loss_cuda_kernel.cuh"
+
+void SigmoidFocalLossForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite target, const DArrayLite weight,
+    DArrayLite output, float gamma, float alpha, cudaStream_t stream) {
+  int output_size = output.size();
+  int num_classes = input.dim(1);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        sigmoid_focal_loss_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.ptr<scalar_t>(), target.ptr<int64_t>(),
+                weight.ptr<scalar_t>(), output.ptr<scalar_t>(), gamma, alpha,
+                num_classes);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SigmoidFocalLossBackwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite target, const DArrayLite weight,
+    DArrayLite grad_input, float gamma, float alpha, cudaStream_t stream) {
+  int output_size = grad_input.size();
+  int num_classes = input.dim(1);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        sigmoid_focal_loss_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.ptr<scalar_t>(), target.ptr<int64_t>(),
+                weight.ptr<scalar_t>(), grad_input.ptr<scalar_t>(), gamma,
+                alpha, num_classes);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SoftmaxFocalLossForwardCUDAKernelLauncher(
+    const DArrayLite softmax, const DArrayLite target, const DArrayLite weight,
+    DArrayLite output, float gamma, float alpha, cudaStream_t stream) {
+  int output_size = output.size();
+  int num_classes = softmax.dim(1);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      softmax.elemType().prim(), ([&] {
+        softmax_focal_loss_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, softmax.ptr<scalar_t>(), target.ptr<int64_t>(),
+                weight.ptr<scalar_t>(), output.ptr<scalar_t>(), gamma, alpha,
+                num_classes);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SoftmaxFocalLossBackwardCUDAKernelLauncher(
+    const DArrayLite softmax, const DArrayLite target, const DArrayLite weight,
+    DArrayLite buff, DArrayLite grad_input, float gamma, float alpha,
+    cudaStream_t stream) {
+  int output_size = buff.size();
+  int num_classes = softmax.dim(1);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_input.elemType().prim(), ([&] {
+        softmax_focal_loss_backward_cuda1_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, softmax.ptr<scalar_t>(), target.ptr<int64_t>(),
+                weight.ptr<scalar_t>(), buff.ptr<scalar_t>(), gamma, alpha,
+                num_classes);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+
+  output_size = grad_input.size();
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_input.elemType().prim(), ([&] {
+        softmax_focal_loss_backward_cuda2_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, softmax.ptr<scalar_t>(), target.ptr<int64_t>(),
+                buff.ptr<scalar_t>(), grad_input.ptr<scalar_t>(), num_classes);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/masked_conv2d.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/masked_conv2d.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..de5c9ddea874488d0a3ca34196513fe03c89df6c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/masked_conv2d.cpp
@@ -0,0 +1,80 @@
+#include "parrots_cpp_helper.hpp"
+
+void MaskedIm2colForwardCUDAKernelLauncher(
+    const DArrayLite bottom_data, const DArrayLite mask_h_idx,
+    const DArrayLite mask_w_idx, DArrayLite top_data, const int kernel_h,
+    const int kernel_w, const int pad_h, const int pad_w, cudaStream_t stream);
+
+void MaskedCol2imForwardCUDAKernelLaucher(const DArrayLite bottom_data,
+                                          const DArrayLite mask_h_idx,
+                                          const DArrayLite mask_w_idx,
+                                          DArrayLite top_data, const int height,
+                                          const int width, const int channels,
+                                          cudaStream_t stream);
+
+void masked_im2col_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                                const OperatorBase::in_list_t& ins,
+                                OperatorBase::out_list_t& outs) {
+  // im: (n, ic, h, w), kernel size (kh, kw)
+  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
+  int kernel_h, kernel_w, pad_h, pad_w;
+  SSAttrs(attr)
+      .get<int>("kernel_h", kernel_h)
+      .get<int>("kernel_w", kernel_w)
+      .get<int>("pad_h", pad_h)
+      .get<int>("pad_w", pad_w)
+      .done();
+
+  const auto& im = ins[0];
+  const auto& mask_h_idx = ins[1];
+  const auto& mask_w_idx = ins[2];
+
+  auto& col = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col,
+                                        kernel_h, kernel_w, pad_h, pad_w,
+                                        stream);
+}
+
+void masked_col2im_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                                const OperatorBase::in_list_t& ins,
+                                OperatorBase::out_list_t& outs) {
+  // im: (n, ic, h, w), kernel size (kh, kw)
+  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
+  int height, width, channels;
+  SSAttrs(attr)
+      .get<int>("height", height)
+      .get<int>("width", width)
+      .get<int>("channels", channels)
+      .done();
+
+  const auto& col = ins[0];
+  const auto& mask_h_idx = ins[1];
+  const auto& mask_w_idx = ins[2];
+
+  auto& im = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  MaskedCol2imForwardCUDAKernelLaucher(col, mask_h_idx, mask_w_idx, im, height,
+                                       width, channels, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(masked_im2col_forward)
+    .attr("kernel_h")
+    .attr("kernel_w")
+    .attr("pad_h")
+    .attr("pad_w")
+    .input(3)
+    .output(1)
+    .apply(masked_im2col_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(masked_col2im_forward)
+    .attr("height")
+    .attr("width")
+    .attr("channels")
+    .input(3)
+    .output(1)
+    .apply(masked_col2im_forward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..879d9911b5b6bc72544b53cc5cc373e753249df8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu
@@ -0,0 +1,45 @@
+#include "masked_conv2d_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void MaskedIm2colForwardCUDAKernelLauncher(
+    const DArrayLite bottom_data, const DArrayLite mask_h_idx,
+    const DArrayLite mask_w_idx, DArrayLite top_data, const int kernel_h,
+    const int kernel_w, const int pad_h, const int pad_w, cudaStream_t stream) {
+  int channels = bottom_data.dim(1);
+  int height = bottom_data.dim(2);
+  int width = bottom_data.dim(3);
+  int mask_cnt = mask_h_idx.dim(0);
+  int output_size = mask_cnt * channels;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      bottom_data.elemType().prim(), ([&] {
+        MaskedIm2colForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, bottom_data.ptr<scalar_t>(), height, width,
+                kernel_h, kernel_w, pad_h, pad_w, mask_h_idx.ptr<int64_t>(),
+                mask_w_idx.ptr<int64_t>(), mask_cnt, top_data.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void MaskedCol2imForwardCUDAKernelLaucher(const DArrayLite bottom_data,
+                                          const DArrayLite mask_h_idx,
+                                          const DArrayLite mask_w_idx,
+                                          DArrayLite top_data, const int height,
+                                          const int width, const int channels,
+                                          cudaStream_t stream) {
+  int mask_cnt = mask_h_idx.dim(0);
+  int output_size = mask_cnt * channels;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      bottom_data.elemType().prim(), ([&] {
+        MaskedCol2imForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, bottom_data.ptr<scalar_t>(), height, width,
+                channels, mask_h_idx.ptr<int64_t>(), mask_w_idx.ptr<int64_t>(),
+                mask_cnt, top_data.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ef3f4af55206fa9dcfbec1497460e8c67d2d31a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
@@ -0,0 +1,134 @@
+// Copyright (c) 2019, SenseTime.
+#include "parrots_cpp_helper.hpp"
+
+void ModulatedDeformConvForwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite weight, const DArrayLite bias,
+    const DArrayLite ones, const DArrayLite offset, const DArrayLite mask,
+    DArrayLite output, DArrayLite columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    int deformable_group, const bool with_bias, CudaContext& ctx,
+    cudaStream_t stream);
+
+void ModulatedDeformConvBackwardCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite weight, const DArrayLite bias,
+    const DArrayLite ones, const DArrayLite offset, const DArrayLite mask,
+    DArrayLite columns, DArrayLite grad_input, DArrayLite grad_weight,
+    DArrayLite grad_bias, DArrayLite grad_offset, DArrayLite grad_mask,
+    DArrayLite grad_output, int kernel_h, int kernel_w, int stride_h,
+    int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w,
+    int group, int deformable_group, const bool with_bias, CudaContext& ctx,
+    cudaStream_t stream);
+
+void modulated_deform_conv_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                                        const OperatorBase::in_list_t& ins,
+                                        OperatorBase::out_list_t& outs) {
+  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
+      dilation_w, group, deformable_group, with_bias;
+  SSAttrs(attr)
+      .get<int>("kernel_h", kernel_h)
+      .get<int>("kernel_w", kernel_w)
+      .get<int>("stride_h", stride_h)
+      .get<int>("stride_w", stride_w)
+      .get<int>("pad_h", pad_h)
+      .get<int>("pad_w", pad_w)
+      .get<int>("dilation_h", dilation_h)
+      .get<int>("dilation_w", dilation_w)
+      .get<int>("group", group)
+      .get<int>("deformable_group", deformable_group)
+      .get<int>("with_bias", with_bias)
+      .done();
+
+  auto input = ins[0];
+  auto weight = ins[1];
+  auto bias = ins[2];
+  auto ones = ins[3];
+  auto offset = ins[4];
+  auto mask = ins[5];
+
+  auto output = outs[0];
+  auto columns = outs[1];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  ModulatedDeformConvForwardCUDAKernelLauncher(
+      input, weight, bias, ones, offset, mask, output, columns, kernel_h,
+      kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+      deformable_group, with_bias, ctx, stream);
+}
+
+void modulated_deform_conv_backward_cuda(CudaContext& ctx,
+                                         const SSElement& attr,
+                                         const OperatorBase::in_list_t& ins,
+                                         OperatorBase::out_list_t& outs) {
+  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
+      dilation_w, group, deformable_group, with_bias;
+  SSAttrs(attr)
+      .get<int>("kernel_h", kernel_h)
+      .get<int>("kernel_w", kernel_w)
+      .get<int>("stride_h", stride_h)
+      .get<int>("stride_w", stride_w)
+      .get<int>("pad_h", pad_h)
+      .get<int>("pad_w", pad_w)
+      .get<int>("dilation_h", dilation_h)
+      .get<int>("dilation_w", dilation_w)
+      .get<int>("group", group)
+      .get<int>("deformable_group", deformable_group)
+      .get<int>("with_bias", with_bias)
+      .done();
+
+  auto input = ins[0];
+  auto weight = ins[1];
+  auto bias = ins[2];
+  auto ones = ins[3];
+  auto offset = ins[4];
+  auto mask = ins[5];
+
+  auto columns = outs[0];
+  auto grad_input = outs[1];
+  auto grad_weight = outs[2];
+  auto grad_bias = outs[3];
+  auto grad_offset = outs[4];
+  auto grad_mask = outs[5];
+  auto grad_output = outs[6];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  ModulatedDeformConvBackwardCUDAKernelLauncher(
+      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
+      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
+      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+      deformable_group, with_bias, ctx, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(modulated_deform_conv_forward)
+    .attr("kernel_h")
+    .attr("kernel_w")
+    .attr("stride_h")
+    .attr("stride_w")
+    .attr("pad_h")
+    .attr("pad_w")
+    .attr("dilation_h")
+    .attr("dilation_w")
+    .attr("group")
+    .attr("deformable_group")
+    .attr("with_bias")
+    .input(6)
+    .output(2)
+    .apply(modulated_deform_conv_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(modulated_deform_conv_backward)
+    .attr("kernel_h")
+    .attr("kernel_w")
+    .attr("stride_h")
+    .attr("stride_w")
+    .attr("pad_h")
+    .attr("pad_w")
+    .attr("dilation_h")
+    .attr("dilation_w")
+    .attr("group")
+    .attr("deformable_group")
+    .attr("with_bias")
+    .input(6)
+    .output(7)
+    .apply(modulated_deform_conv_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..da5692aef4ea9be517084be92b62a29703a54178
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu
@@ -0,0 +1,341 @@
+#include "modulated_deform_conv_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+void modulated_deformable_im2col_cuda(
+    const DArrayLite data_im, const DArrayLite data_offset,
+    const DArrayLite data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kenerl_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    DArrayLite data_col, cudaStream_t stream) {
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.elemType().prim(), ([&] {
+        modulated_deformable_im2col_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, data_im.ptr<scalar_t>(), data_offset.ptr<scalar_t>(),
+            data_mask.ptr<scalar_t>(), height_im, width_im, kernel_h, kenerl_w,
+            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, batch_size, channels,
+            deformable_group, height_col, width_col, data_col.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void modulated_deformable_col2im_cuda(
+    const DArrayLite data_col, const DArrayLite data_offset,
+    const DArrayLite data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    DArrayLite grad_im, cudaStream_t stream) {
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels =
+      channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.elemType().prim(), ([&] {
+        modulated_deformable_col2im_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, data_col.ptr<scalar_t>(), data_offset.ptr<scalar_t>(),
+            data_mask.ptr<scalar_t>(), channels, height_im, width_im, kernel_h,
+            kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, batch_size, deformable_group,
+            height_col, width_col, grad_im.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void modulated_deformable_col2im_coord_cuda(
+    const DArrayLite data_col, const DArrayLite data_im,
+    const DArrayLite data_offset, const DArrayLite data_mask,
+    const int batch_size, const int channels, const int height_im,
+    const int width_im, const int height_col, const int width_col,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int dilation_h,
+    const int dilation_w, const int deformable_group, DArrayLite grad_offset,
+    DArrayLite grad_mask, cudaStream_t stream) {
+  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h *
+                          kernel_w * deformable_group;
+  const int channel_per_deformable_group =
+      channels * kernel_h * kernel_w / deformable_group;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.elemType().prim(), ([&] {
+        modulated_deformable_col2im_coord_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, data_col.ptr<scalar_t>(), data_im.ptr<scalar_t>(),
+            data_offset.ptr<scalar_t>(), data_mask.ptr<scalar_t>(), channels,
+            height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+            stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, 2 * kernel_h * kernel_w * deformable_group,
+            deformable_group, height_col, width_col,
+            grad_offset.ptr<scalar_t>(), grad_mask.ptr<scalar_t>());
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void ModulatedDeformConvForwardCUDAKernelLauncher(
+    DArrayLite input, DArrayLite weight, DArrayLite bias, DArrayLite ones,
+    DArrayLite offset, DArrayLite mask, DArrayLite output, DArrayLite columns,
+    int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+    const int pad_h, const int pad_w, const int dilation_h,
+    const int dilation_w, const int group, const int deformable_group,
+    const bool with_bias, CudaContext& ctx, cudaStream_t stream) {
+  const int batch = input.dim(0);
+  const int channels = input.dim(1);
+  const int height = input.dim(2);
+  const int width = input.dim(3);
+
+  const int channels_out = weight.dim(0);
+  const int channels_kernel = weight.dim(1);
+  const int kernel_h_ = weight.dim(2);
+  const int kernel_w_ = weight.dim(3);
+
+  PARROTS_CHECKARGS(kernel_h_ == kernel_h && kernel_w_ == kernel_w)
+      << "Input shape and kernel shape wont match: (" << kernel_h << " x "
+      << kernel_w << " vs " << kernel_h_ << " x " << kernel_w_ << ").";
+
+  PARROTS_CHECKARGS(channels == channels_kernel * group)
+      << "Input shape and kernel channels wont match: (" << channels << " vs "
+      << channels_kernel * group << ").";
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndims() != 2 || ones.dim(0) * ones.dim(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = ctx.createDArrayLite(input.elemType(),
+                                DArrayShape(height_out, width_out));
+    fill(ctx, ones, *toScalar(1));
+  }
+
+  // resize output
+  output = output.view({batch, channels_out, height_out, width_out});
+  output.setZeros(ctx.getStream());
+
+  // resize temporary columns
+  columns = ctx.createDArrayLite(
+      input.elemType(),
+      DArrayShape(channels * kernel_h * kernel_w, 1 * height_out * width_out));
+  columns.setZeros(ctx.getStream());
+
+  output = output.view({output.dim(0), group, output.dim(1) / group,
+                        output.dim(2), output.dim(3)});
+
+  for (size_t b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns, stream);
+
+    // divide into group
+    weight = weight.view({group, weight.dim(0) / group, weight.dim(1),
+                          weight.dim(2), weight.dim(3)});
+    columns = columns.view({group, columns.dim(0) / group, columns.dim(1)});
+
+    for (size_t g = 0; g < group; g++) {
+      auto output_g = output[b][g];
+      gemm(ctx, 1, false,
+           weight[g].view(
+               {weight.dim(1), weight.dim(2) * weight.dim(3) * weight.dim(4)}),
+           false, columns[g], 1, output_g);
+    }
+
+    weight = weight.view({weight.dim(0) * weight.dim(1), weight.dim(2),
+                          weight.dim(3), weight.dim(4)});
+    columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)});
+  }
+
+  output = output.view({output.dim(0), output.dim(1) * output.dim(2),
+                        output.dim(3), output.dim(4)});
+
+  if (with_bias) {
+    bias = bias.view({1, bias.dim(0), 1, 1});
+    add(ctx, output, bias, output);
+  }
+}
+
+void ModulatedDeformConvBackwardCUDAKernelLauncher(
+    DArrayLite input, DArrayLite weight, DArrayLite bias, DArrayLite ones,
+    DArrayLite offset, DArrayLite mask, DArrayLite columns,
+    DArrayLite grad_input, DArrayLite grad_weight, DArrayLite grad_bias,
+    DArrayLite grad_offset, DArrayLite grad_mask, DArrayLite grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias, CudaContext& ctx, cudaStream_t stream) {
+  const int batch = input.dim(0);
+  const int channels = input.dim(1);
+  const int height = input.dim(2);
+  const int width = input.dim(3);
+
+  const int channels_kernel = weight.dim(1);
+  const int kernel_h_ = weight.dim(2);
+  const int kernel_w_ = weight.dim(3);
+
+  PARROTS_CHECKARGS(kernel_h_ == kernel_h && kernel_w_ == kernel_w)
+      << "Input shape and kernel shape wont match: (" << kernel_h << " x "
+      << kernel_w << " vs " << kernel_h_ << " x " << kernel_w_ << ").";
+
+  PARROTS_CHECKARGS(channels == channels_kernel * group)
+      << "Input shape and kernel channels wont match: (" << channels << " vs "
+      << channels_kernel * group << ").";
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndims() != 2 || ones.dim(0) * ones.dim(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = ctx.createDArrayLite(input.elemType(),
+                                DArrayShape(height_out, width_out));
+    fill(ctx, ones, *toScalar(1));
+  }
+
+  grad_input = grad_input.view({batch, channels, height, width});
+  columns = ctx.createDArrayLite(
+      input.elemType(),
+      DArrayShape(channels * kernel_h * kernel_w, height_out * width_out));
+
+  grad_output =
+      grad_output.view({grad_output.dim(0), group, grad_output.dim(1) / group,
+                        grad_output.dim(2), grad_output.dim(3)});
+
+  for (size_t b = 0; b < batch; b++) {
+    // divide int group
+    columns = columns.view({group, columns.dim(0) / group, columns.dim(1)});
+    weight = weight.view({group, weight.dim(0) / group, weight.dim(1),
+                          weight.dim(2), weight.dim(3)});
+
+    for (size_t g = 0; g < group; g++) {
+      auto columns_g = ctx.createDArrayLite(
+          weight.elemType(), DArrayShape(columns.dim(1), columns.dim(2)));
+      copy(ctx, columns_g, columns[g]);
+      auto weight_g = weight[g].view(
+          {weight.dim(1), weight.dim(2) * weight.dim(3) * weight.dim(4)});
+      weight_g = transpose(ctx, weight_g, 0, 1);
+
+      auto grad_output_bg = ctx.createDArrayLite(
+          grad_output.elemType(),
+          DArrayShape(grad_output.dim(2), grad_output.dim(3),
+                      grad_output.dim(4)));
+      copy(ctx, grad_output_bg, grad_output[b][g]);
+      grad_output_bg =
+          grad_output_bg.view({grad_output_bg.dim(0),
+                               grad_output_bg.dim(1) * grad_output_bg.dim(2)});
+
+      columns_g =
+          parrots::op::addmm(ctx, columns[g], weight_g, grad_output_bg, 0, 1);
+      auto columns_out = columns[g];
+      copy(ctx, columns_out, columns_g);
+    }
+
+    columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)});
+    weight = weight.view({weight.dim(0) * weight.dim(1), weight.dim(2),
+                          weight.dim(3), weight.dim(4)});
+
+    // gradient w.r.t. input coordinate data
+    modulated_deformable_col2im_coord_cuda(
+        columns, input[b], offset[b], mask[b], 1, channels, height, width,
+        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
+        grad_mask[b], stream);
+    // gradient w.r.t. input data
+    modulated_deformable_col2im_cuda(
+        columns, offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, grad_input[b], stream);
+
+    // gradient w.r.t. weight, dWeight should accumulate across the batch and
+    // group
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns, stream);
+
+    columns = columns.view({group, columns.dim(0) / group, columns.dim(1)});
+    grad_weight =
+        grad_weight.view({group, grad_weight.dim(0) / group, grad_weight.dim(1),
+                          grad_weight.dim(2), grad_weight.dim(3)});
+    if (with_bias) {
+      grad_bias = grad_bias.view({group, grad_bias.dim(0) / group});
+    }
+
+    for (size_t g = 0; g < group; g++) {
+      auto grad_weight_g = ctx.createDArrayLite(
+          grad_weight.elemType(),
+          DArrayShape(grad_weight.dim(1), grad_weight.dim(2),
+                      grad_weight.dim(3), grad_weight.dim(4)));
+      copy(ctx, grad_weight_g, grad_weight[g]);
+      grad_weight_g = grad_weight_g.view(
+          {grad_weight_g.dim(0),
+           grad_weight_g.dim(1) * grad_weight_g.dim(2) * grad_weight_g.dim(3)});
+
+      auto columns_g = columns[g];
+      columns_g = transpose(ctx, columns_g, 0, 1);
+
+      auto grad_output_bg = ctx.createDArrayLite(
+          grad_output.elemType(),
+          DArrayShape(grad_output.dim(2), grad_output.dim(3),
+                      grad_output.dim(4)));
+      copy(ctx, grad_output_bg, grad_output[b][g]);
+      grad_output_bg =
+          grad_output_bg.view({grad_output_bg.dim(0),
+                               grad_output_bg.dim(1) * grad_output_bg.dim(2)});
+
+      grad_weight_g = parrots::op::addmm(ctx, grad_weight_g, grad_output_bg,
+                                         columns_g, 1, 1);
+      auto grad_weight_out = grad_weight[g];
+      copy(ctx, grad_weight_out, grad_weight_g);
+
+      if (with_bias) {
+        auto grad_bias_g = ctx.createDArrayLite(grad_bias.elemType(),
+                                                DArrayShape(grad_bias.dim(1)));
+        copy(ctx, grad_bias_g, grad_bias[g]);
+        grad_bias_g = grad_bias_g.view({grad_bias_g.dim(0), 1});
+
+        auto grad_output_bg = ctx.createDArrayLite(
+            grad_output.elemType(),
+            DArrayShape(grad_output.dim(2), grad_output.dim(3),
+                        grad_output.dim(4)));
+        copy(ctx, grad_output_bg, grad_output[b][g]);
+        grad_output_bg = grad_output_bg.view(
+            {grad_output_bg.dim(0),
+             grad_output_bg.dim(1) * grad_output_bg.dim(2)});
+
+        auto ones_g = ctx.createDArrayLite(
+            ones.elemType(), DArrayShape(ones.dim(0), ones.dim(1)));
+        copy(ctx, ones_g, ones);
+        ones_g = ones_g.view({ones_g.dim(0) * ones_g.dim(1), 1});
+
+        grad_bias_g =
+            parrots::op::addmm(ctx, grad_bias_g, grad_output_bg, ones_g, 1, 1);
+
+        auto grad_bias_out = grad_bias[g];
+        copy(ctx, grad_bias_out, grad_bias_g);
+      }
+    }
+
+    columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)});
+    grad_weight = grad_weight.view({grad_weight.dim(0) * grad_weight.dim(1),
+                                    grad_weight.dim(2), grad_weight.dim(3),
+                                    grad_weight.dim(4)});
+    if (with_bias)
+      grad_bias =
+          grad_bias.view(DArrayShape{grad_bias.dim(0) * grad_bias.dim(1)});
+  }
+  grad_output = grad_output.view({grad_output.dim(0) * grad_output.dim(1),
+                                  grad_output.dim(2), grad_output.dim(3),
+                                  grad_output.dim(4)});
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..158eb9a62ad214f29f1069b653781bad6e1a291f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms.cpp
@@ -0,0 +1,248 @@
+#include "parrots_cpp_helper.hpp"
+#define DIVUP(x, y) (((x) + (y)-1) / (y))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+DArrayLite NMSCUDAKernelLauncher(const DArrayLite boxes_sorted,
+                                 const DArrayLite order, const DArrayLite areas,
+                                 float iou_threshold, int offset,
+                                 CudaContext& ctx, cudaStream_t stream);
+
+void nms_cuda(CudaContext& ctx, const SSElement& attr,
+              const OperatorBase::in_list_t& ins,
+              OperatorBase::out_list_t& outs) {
+  float iou_threshold;
+  int offset;
+  SSAttrs(attr)
+      .get<float>("iou_threshold", iou_threshold)
+      .get<int>("offset", offset)
+      .done();
+
+  const auto& boxes_sorted = ins[0];
+  const auto& order = ins[1];
+  const auto& areas = ins[2];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  outs[0] = NMSCUDAKernelLauncher(boxes_sorted, order, areas, iou_threshold,
+                                  offset, ctx, stream);
+}
+
+void nms_cpu(HostContext& ctx, const SSElement& attr,
+             const OperatorBase::in_list_t& ins,
+             OperatorBase::out_list_t& outs) {
+  float iou_threshold;
+  int offset;
+  SSAttrs(attr)
+      .get<float>("iou_threshold", iou_threshold)
+      .get<int>("offset", offset)
+      .done();
+
+  const auto& boxes = ins[0];
+  const auto& order = ins[1];
+  const auto& areas = ins[2];
+
+  size_t nboxes = boxes.shape().dim(0);
+  size_t boxes_dim = boxes.shape().dim(1);
+
+  auto select = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, nboxes),
+                                     getHostProxy());
+  select.setZeros(syncStream());
+
+  if (boxes.size() == 0) {
+    outs[0] = select;
+    return;
+  }
+
+  fill(ctx, select, *toScalar(1));
+
+  auto select_ptr = select.ptr<int64_t>();
+  auto boxes_ptr = boxes.ptr<float>();
+  auto order_ptr = order.ptr<int64_t>();
+  auto areas_ptr = areas.ptr<float>();
+
+  for (int64_t _i = 0; _i < nboxes; _i++) {
+    if (select_ptr[_i] == 0) continue;
+    auto i = order_ptr[_i];
+    auto ix1 = boxes_ptr[i * boxes_dim];
+    auto iy1 = boxes_ptr[i * boxes_dim + 1];
+    auto ix2 = boxes_ptr[i * boxes_dim + 2];
+    auto iy2 = boxes_ptr[i * boxes_dim + 3];
+    auto iarea = areas_ptr[i];
+    for (int64_t _j = _i + 1; _j < nboxes; _j++) {
+      if (select_ptr[_j] == 0) continue;
+      auto j = order_ptr[_j];
+      auto xx1 = fmaxf(ix1, boxes_ptr[j * boxes_dim]);
+      auto yy1 = fmaxf(iy1, boxes_ptr[j * boxes_dim + 1]);
+      auto xx2 = fminf(ix2, boxes_ptr[j * boxes_dim + 2]);
+      auto yy2 = fminf(iy2, boxes_ptr[j * boxes_dim + 3]);
+
+      auto w = fmaxf(0.0, xx2 - xx1 + offset);
+      auto h = fmaxf(0.0, yy2 - yy1 + offset);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + areas_ptr[j] - inter);
+      if (ovr >= iou_threshold) select_ptr[_j] = 0;
+    }
+  }
+  outs[0] = select;
+}
+
+void softnms_cpu(HostContext& ctx, const SSElement& attr,
+                 const OperatorBase::in_list_t& ins,
+                 OperatorBase::out_list_t& outs) {
+  float iou_threshold;
+  float sigma;
+  float min_score;
+  int method;
+  int offset;
+  SSAttrs(attr)
+      .get<float>("iou_threshold", iou_threshold)
+      .get<float>("sigma", sigma)
+      .get<float>("min_score", min_score)
+      .get<int>("method", method)
+      .get<int>("offset", offset)
+      .done();
+
+  const auto& boxes = ins[0];
+  const auto& scores = ins[1];
+  const auto& areas = ins[2];
+
+  size_t nboxes = boxes.shape().dim(0);
+  size_t boxes_dim = boxes.shape().dim(1);
+  auto boxes_ptr = boxes.ptr<float>();
+  auto scores_ptr = scores.ptr<float>();
+  auto areas_ptr = areas.ptr<float>();
+
+  auto inputs = ctx.createDArrayLite(
+      DArraySpec::array(Prim::Float32, DArrayShape(nboxes, 6)));
+  auto inputs_ptr = inputs.ptr<float>();
+  auto dets = ctx.createDArrayLite(
+      DArraySpec::array(Prim::Float32, DArrayShape(nboxes, 5)));
+  auto de = dets.ptr<float>();
+  for (size_t i = 0; i < nboxes; i++) {
+    inputs_ptr[i * 6 + 0] = boxes_ptr[i * boxes_dim + 0];
+    inputs_ptr[i * 6 + 1] = boxes_ptr[i * boxes_dim + 1];
+    inputs_ptr[i * 6 + 2] = boxes_ptr[i * boxes_dim + 2];
+    inputs_ptr[i * 6 + 3] = boxes_ptr[i * boxes_dim + 3];
+    inputs_ptr[i * 6 + 4] = scores_ptr[i];
+    inputs_ptr[i * 6 + 5] = areas_ptr[i];
+  }
+
+  size_t pos = 0;
+  auto inds_t = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, nboxes));
+  arange(ctx, *toScalar(0), *toScalar(nboxes), *toScalar(1), inds_t);
+  auto inds = inds_t.ptr<int64_t>();
+  auto num_out = ctx.createDArrayLite(DArraySpec::scalar(Prim::Int64));
+
+  for (size_t i = 0; i < nboxes; i++) {
+    auto max_score = inputs_ptr[i * 6 + 4];
+    auto max_pos = i;
+
+    pos = i + 1;
+    // get max box
+    while (pos < nboxes) {
+      if (max_score < inputs_ptr[pos * 6 + 4]) {
+        max_score = inputs_ptr[pos * 6 + 4];
+        max_pos = pos;
+      }
+      pos = pos + 1;
+    }
+    // swap
+    auto ix1 = de[i * 5 + 0] = inputs_ptr[max_pos * 6 + 0];
+    auto iy1 = de[i * 5 + 1] = inputs_ptr[max_pos * 6 + 1];
+    auto ix2 = de[i * 5 + 2] = inputs_ptr[max_pos * 6 + 2];
+    auto iy2 = de[i * 5 + 3] = inputs_ptr[max_pos * 6 + 3];
+    auto iscore = de[i * 5 + 4] = inputs_ptr[max_pos * 6 + 4];
+    auto iarea = inputs_ptr[max_pos * 6 + 5];
+    auto iind = inds[max_pos];
+    inputs_ptr[max_pos * 6 + 0] = inputs_ptr[i * 6 + 0];
+    inputs_ptr[max_pos * 6 + 1] = inputs_ptr[i * 6 + 1];
+    inputs_ptr[max_pos * 6 + 2] = inputs_ptr[i * 6 + 2];
+    inputs_ptr[max_pos * 6 + 3] = inputs_ptr[i * 6 + 3];
+    inputs_ptr[max_pos * 6 + 4] = inputs_ptr[i * 6 + 4];
+    inputs_ptr[max_pos * 6 + 5] = inputs_ptr[i * 6 + 5];
+    inds[max_pos] = inds[i];
+    inputs_ptr[i * 6 + 0] = ix1;
+    inputs_ptr[i * 6 + 1] = iy1;
+    inputs_ptr[i * 6 + 2] = ix2;
+    inputs_ptr[i * 6 + 3] = iy2;
+    inputs_ptr[i * 6 + 4] = iscore;
+    inputs_ptr[i * 6 + 5] = iarea;
+    inds[i] = iind;
+
+    pos = i + 1;
+    while (pos < nboxes) {
+      auto xx1 = fmaxf(ix1, inputs_ptr[pos * 6 + 0]);
+      auto yy1 = fmaxf(iy1, inputs_ptr[pos * 6 + 1]);
+      auto xx2 = fminf(ix2, inputs_ptr[pos * 6 + 2]);
+      auto yy2 = fminf(iy2, inputs_ptr[pos * 6 + 3]);
+
+      auto w = fmaxf(0.0, xx2 - xx1 + offset);
+      auto h = fmaxf(0.0, yy2 - yy1 + offset);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + inputs_ptr[pos * 6 + 5] - inter);
+
+      float weight = 1.;
+      if (method == 0) {
+        if (ovr >= iou_threshold) weight = 0;
+      } else if (method == 1) {
+        if (ovr >= iou_threshold) weight = 1 - ovr;
+      } else if (method == 2) {
+        weight = exp(-(ovr * ovr) / sigma);
+      }
+      inputs_ptr[pos * 6 + 4] *= weight;
+      // if box score falls below threshold, discard the box by
+      // swapping with last box update N
+      if (inputs_ptr[pos * 6 + 4] < min_score) {
+        inputs_ptr[pos * 6 + 0] = inputs_ptr[(nboxes - 1) * 6 + 0];
+        inputs_ptr[pos * 6 + 1] = inputs_ptr[(nboxes - 1) * 6 + 1];
+        inputs_ptr[pos * 6 + 2] = inputs_ptr[(nboxes - 1) * 6 + 2];
+        inputs_ptr[pos * 6 + 3] = inputs_ptr[(nboxes - 1) * 6 + 3];
+        inputs_ptr[pos * 6 + 4] = inputs_ptr[(nboxes - 1) * 6 + 4];
+        inputs_ptr[pos * 6 + 5] = inputs_ptr[(nboxes - 1) * 6 + 5];
+        inds[pos] = inds[nboxes - 1];
+        nboxes = nboxes - 1;
+        pos = pos - 1;
+      }
+      pos = pos + 1;
+    }
+  }
+  setScalar(num_out, int64_t{nboxes});
+  outs[0] = dets;
+  outs[1] = inds_t;
+  outs[2] = num_out;
+}
+
+void nms_match_cpu(HostContext& ctx, const SSElement& attr,
+                   const OperatorBase::in_list_t& ins,
+                   OperatorBase::out_list_t& outs) {
+  float iou_threshold;
+  SSAttrs(attr).get<float>("iou_threshold", iou_threshold).done();
+}
+
+PARROTS_EXTENSION_REGISTER(nms)
+    .attr("iou_threshold")
+    .attr("offset")
+    .input(3)
+    .output(1)
+    .apply(nms_cpu)
+#ifdef PARROTS_USE_CUDA
+    .apply(nms_cuda)
+#endif
+    .done();
+
+PARROTS_EXTENSION_REGISTER(softnms)
+    .attr("iou_threshold")
+    .attr("sigma")
+    .attr("min_score")
+    .attr("method")
+    .attr("offset")
+    .input(3)
+    .output(3)
+    .apply(softnms_cpu)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(nms_match)
+    .attr("iou_threshold")
+    .input(1)
+    .output(1)
+    .apply(nms_match_cpu)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..840e792648d5db8181a6e74aa2fe205f583e89d3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_cuda.cu
@@ -0,0 +1,55 @@
+#include "nms_cuda_kernel.cuh"
+#include "parrots_cuda_helper.hpp"
+
+DArrayLite NMSCUDAKernelLauncher(const DArrayLite boxes_sorted,
+                                 const DArrayLite order, const DArrayLite areas,
+                                 float iou_threshold, int offset,
+                                 CudaContext& ctx, cudaStream_t stream) {
+  size_t boxes_num = boxes_sorted.dim(0);
+
+  if (boxes_sorted.size() == 0) {
+    auto select = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, 0));
+    return select;
+  }
+
+  const size_t col_blocks = DIVUP(boxes_num, threadsPerBlock);
+  auto mask = ctx.createDArrayLite(
+      DArraySpec::array(Prim::Int64, DArrayShape(boxes_num, col_blocks)));
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(threadsPerBlock);
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+  nms_cuda<<<blocks, threads, 0, stream>>>(
+      boxes_num, iou_threshold, offset, boxes_sorted.ptr<float>(),
+      (unsigned long long*)mask.ptr<int64_t>());
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+
+  auto mask_cpu = ctx.createDArrayLite(mask, getHostProxy());
+  auto mask_host = mask_cpu.ptr<int64_t>();
+
+  auto remv = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, col_blocks),
+                                   getHostProxy());
+  remv.setZeros(syncStream());
+  auto remv_ptr = remv.ptr<int64_t>();
+
+  auto keep_t = ctx.createDArrayLite(DArraySpec::array(Prim::Uint8, boxes_num),
+                                     getHostProxy());
+  keep_t.setZeros(syncStream());
+  auto keep = keep_t.ptr<uint8_t>();
+
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv_ptr[nblock] & (1ULL << inblock))) {
+      keep[i] = 1;
+      int64_t* p = mask_host + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv_ptr[j] |= p[j];
+      }
+    }
+  }
+
+  auto keep_cuda = ctx.createDArrayLite(keep_t, ctx.getProxy());
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+  return keep_cuda;
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_rotated.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_rotated.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ba240ebc6c9351d59b97683b7ecf2be18c441400
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_rotated.cpp
@@ -0,0 +1,40 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h
+#include "parrots_cpp_helper.hpp"
+
+DArrayLite nms_rotated_cuda(const DArrayLite dets, const DArrayLite scores,
+                            const DArrayLite dets_sorted, float iou_threshold,
+                            const int multi_label, cudaStream_t stream,
+                            CudaContext& ctx);
+
+// Interface for Python
+// inline is needed to prevent multiple function definitions when this header is
+// included by different cpps
+void nms_rotated(CudaContext& ctx, const SSElement& attr,
+                 const OperatorBase::in_list_t& ins,
+                 OperatorBase::out_list_t& outs) {
+  float iou_threshold;
+  int multi_label;
+  SSAttrs(attr)
+      .get<float>("iou_threshold", iou_threshold)
+      .get<int>("multi_label", multi_label)
+      .done();
+
+  const auto& dets = ins[0];
+  const auto& scores = ins[1];
+  const auto& dets_sorted = ins[2];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+
+  outs[0] = nms_rotated_cuda(dets, scores, dets_sorted, iou_threshold,
+                             multi_label, stream, ctx);
+}
+
+PARROTS_EXTENSION_REGISTER(nms_rotated)
+    .attr("multi_label")
+    .attr("iou_threshold")
+    .input(3)
+    .output(1)
+    .apply(nms_rotated)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_rotated_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_rotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bc061e70ce9dcd437f5b126b2c1480dfbcf6c60a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/nms_rotated_cuda.cu
@@ -0,0 +1,54 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+#include "nms_rotated_cuda.cuh"
+#include "parrots_cuda_helper.hpp"
+
+DArrayLite nms_rotated_cuda(const DArrayLite dets, const DArrayLite scores,
+                            const DArrayLite dets_sorted, float iou_threshold,
+                            const int multi_label, cudaStream_t stream,
+                            CudaContext& ctx) {
+  int dets_num = dets.dim(0);
+
+  const int col_blocks = divideUP(dets_num, threadsPerBlock);
+
+  auto mask = ctx.createDArrayLite(
+      DArraySpec::array(Prim::Int64, DArrayShape(dets_num * col_blocks)));
+
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(threadsPerBlock);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(dets_sorted.elemType().prim(), [&] {
+    nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        dets_num, iou_threshold, dets_sorted.ptr<scalar_t>(),
+        (unsigned long long*)mask.ptr<int64_t>(), multi_label);
+  });
+
+  DArrayLite mask_cpu = ctx.createDArrayLite(mask, getHostProxy());
+  unsigned long long* mask_host = (unsigned long long*)mask_cpu.ptr<int64_t>();
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  auto keep = ctx.createDArrayLite(
+      DArraySpec::array(Prim::Int64, DArrayShape(dets_num)), getHostProxy());
+
+  int64_t* keep_out = keep.ptr<int64_t>();
+
+  for (int i = 0; i < dets_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[i] = 1;
+      unsigned long long* p = mask_host + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+
+  auto keep_cuda = ctx.createDArrayLite(keep, ctx.getProxy());
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+  return keep_cuda;
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8349104da7c6e22bcc21c447099b7aa56881eb5b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp
@@ -0,0 +1,2 @@
+#include "parrots_cpp_helper.hpp"
+using namespace parrots;
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu
new file mode 100644
index 0000000000000000000000000000000000000000..cef56852797ab5581e5869b1bbafa76ff6006c4a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu
@@ -0,0 +1,3 @@
+#include "parrots_cuda_helper.hpp"
+
+using namespace parrots;
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/psamask.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/psamask.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9cfcb2360c79ccade6ef3b623a039bf5eae0160b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/psamask.cpp
@@ -0,0 +1,304 @@
+#include "parrots_cpp_helper.hpp"
+
+#ifndef min
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef max
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+void psamask_collect_forward(const int num_, const int h_feature,
+                             const int w_feature, const int h_mask,
+                             const int w_mask, const int half_h_mask,
+                             const int half_w_mask, const float *mask_data,
+                             float *buffer_data) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            buffer_data[(n * h_feature * w_feature +
+                         (hidx + h - half_h_mask) * w_feature +
+                         (widx + w - half_w_mask)) *
+                            h_feature * w_feature +
+                        h * w_feature + w] =
+                mask_data[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                               h_feature +
+                           h) *
+                              w_feature +
+                          w];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_distribute_forward(const int num_, const int h_feature,
+                                const int w_feature, const int h_mask,
+                                const int w_mask, const int half_h_mask,
+                                const int half_w_mask, const float *mask_data,
+                                float *buffer_data) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            buffer_data[(n * h_feature * w_feature + h * w_feature + w) *
+                            h_feature * w_feature +
+                        (hidx + h - half_h_mask) * w_feature +
+                        (widx + w - half_w_mask)] =
+                mask_data[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                               h_feature +
+                           h) *
+                              w_feature +
+                          w];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_collect_backward(const int num_, const int h_feature,
+                              const int w_feature, const int h_mask,
+                              const int w_mask, const int half_h_mask,
+                              const int half_w_mask, const float *buffer_diff,
+                              float *mask_diff) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                           h_feature +
+                       h) *
+                          w_feature +
+                      w] = buffer_diff[(n * h_feature * w_feature +
+                                        (hidx + h - half_h_mask) * w_feature +
+                                        (widx + w - half_w_mask)) *
+                                           h_feature * w_feature +
+                                       h * w_feature + w];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_distribute_backward(const int num_, const int h_feature,
+                                 const int w_feature, const int h_mask,
+                                 const int w_mask, const int half_h_mask,
+                                 const int half_w_mask,
+                                 const float *buffer_diff, float *mask_diff) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                           h_feature +
+                       h) *
+                          w_feature +
+                      w] =
+                buffer_diff[(n * h_feature * w_feature + h * w_feature + w) *
+                                h_feature * w_feature +
+                            (hidx + h - half_h_mask) * w_feature +
+                            (widx + w - half_w_mask)];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_forward_cpu(HostContext &ctx, const SSElement &attr,
+                         const OperatorBase::in_list_t &ins,
+                         OperatorBase::out_list_t &outs) {
+  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+      half_w_mask;
+  SSAttrs(attr)
+      .get<int>("psa_type", psa_type)
+      .get<int>("num_", num_)
+      .get<int>("h_feature", h_feature)
+      .get<int>("w_feature", w_feature)
+      .get<int>("h_mask", h_mask)
+      .get<int>("w_mask", w_mask)
+      .get<int>("half_h_mask", half_h_mask)
+      .get<int>("half_w_mask", half_w_mask)
+      .done();
+  const auto &input = ins[0];
+  auto &output = outs[0];
+
+  auto input_ptr = input.ptr<float>();
+  auto output_ptr = output.ptr<float>();
+
+  if (psa_type == 0)
+    psamask_collect_forward(num_, h_feature, w_feature, h_mask, w_mask,
+                            half_h_mask, half_w_mask, input_ptr, output_ptr);
+  else
+    psamask_distribute_forward(num_, h_feature, w_feature, h_mask, w_mask,
+                               half_h_mask, half_w_mask, input_ptr, output_ptr);
+}
+
+void psamask_backward_cpu(HostContext &ctx, const SSElement &attr,
+                          const OperatorBase::in_list_t &ins,
+                          OperatorBase::out_list_t &outs) {
+  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+      half_w_mask;
+  SSAttrs(attr)
+      .get<int>("psa_type", psa_type)
+      .get<int>("num_", num_)
+      .get<int>("h_feature", h_feature)
+      .get<int>("w_feature", w_feature)
+      .get<int>("h_mask", h_mask)
+      .get<int>("w_mask", w_mask)
+      .get<int>("half_h_mask", half_h_mask)
+      .get<int>("half_w_mask", half_w_mask)
+      .done();
+
+  const auto &input = ins[0];
+  auto &output = outs[0];
+
+  auto input_ptr = input.ptr<float>();
+  auto output_ptr = output.ptr<float>();
+
+  if (psa_type == 0)
+    psamask_collect_backward(num_, h_feature, w_feature, h_mask, w_mask,
+                             half_h_mask, half_w_mask, input_ptr, output_ptr);
+  else
+    psamask_distribute_backward(num_, h_feature, w_feature, h_mask, w_mask,
+                                half_h_mask, half_w_mask, input_ptr,
+                                output_ptr);
+}
+
+void PSAMaskForwardCUDAKernelLauncher(const int psa_type,
+                                      const DArrayLite input, DArrayLite output,
+                                      const int num_, const int h_feature,
+                                      const int w_feature, const int h_mask,
+                                      const int w_mask, const int half_h_mask,
+                                      const int half_w_mask, CudaContext &ctx);
+
+void PSAMaskBackwardCUDAKernelLauncher(const int psa_type,
+                                       const DArrayLite grad_output,
+                                       DArrayLite grad_input, const int num_,
+                                       const int h_feature, const int w_feature,
+                                       const int h_mask, const int w_mask,
+                                       const int half_h_mask,
+                                       const int half_w_mask, CudaContext &ctx);
+
+void psamask_forward_cuda(CudaContext &ctx, const SSElement &attr,
+                          const OperatorBase::in_list_t &ins,
+                          OperatorBase::out_list_t &outs) {
+  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+      half_w_mask;
+  SSAttrs(attr)
+      .get<int>("psa_type", psa_type)
+      .get<int>("num_", num_)
+      .get<int>("h_feature", h_feature)
+      .get<int>("w_feature", w_feature)
+      .get<int>("h_mask", h_mask)
+      .get<int>("w_mask", w_mask)
+      .get<int>("half_h_mask", half_h_mask)
+      .get<int>("half_w_mask", half_w_mask)
+      .done();
+  const auto &input = ins[0];
+  auto &output = outs[0];
+  PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature,
+                                   w_feature, h_mask, w_mask, half_h_mask,
+                                   half_w_mask, ctx);
+}
+
+void psamask_backward_cuda(CudaContext &ctx, const SSElement &attr,
+                           const OperatorBase::in_list_t &ins,
+                           OperatorBase::out_list_t &outs) {
+  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+      half_w_mask;
+  SSAttrs(attr)
+      .get<int>("psa_type", psa_type)
+      .get<int>("num_", num_)
+      .get<int>("h_feature", h_feature)
+      .get<int>("w_feature", w_feature)
+      .get<int>("h_mask", h_mask)
+      .get<int>("w_mask", w_mask)
+      .get<int>("half_h_mask", half_h_mask)
+      .get<int>("half_w_mask", half_w_mask)
+      .done();
+
+  const auto &input = ins[0];
+  auto &output = outs[0];
+  PSAMaskBackwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature,
+                                    w_feature, h_mask, w_mask, half_h_mask,
+                                    half_w_mask, ctx);
+}
+
+PARROTS_EXTENSION_REGISTER(psamask_forward)
+    .attr("psa_type")
+    .attr("num_")
+    .attr("h_feature")
+    .attr("w_feature")
+    .attr("h_mask")
+    .attr("w_mask")
+    .attr("half_h_mask")
+    .attr("half_w_mask")
+    .input(1)
+    .output(1)
+    .apply(psamask_forward_cpu)
+#ifdef PARROTS_USE_CUDA
+    .apply(psamask_forward_cuda)
+#endif
+    .done();
+
+PARROTS_EXTENSION_REGISTER(psamask_backward)
+    .attr("psa_type")
+    .attr("num_")
+    .attr("h_feature")
+    .attr("w_feature")
+    .attr("h_mask")
+    .attr("w_mask")
+    .attr("half_h_mask")
+    .attr("half_w_mask")
+    .input(1)
+    .output(1)
+    .apply(psamask_backward_cpu)
+#ifdef PARROTS_USE_CUDA
+    .apply(psamask_backward_cuda)
+#endif
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/psamask_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/psamask_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e82e01d6e6b78a95a46dd4bb81cbb5c505ff7ab6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/psamask_cuda.cu
@@ -0,0 +1,48 @@
+// Modified from
+// https://github.com/hszhao/semseg/blob/master/lib/psa/src
+
+#include "parrots_cuda_helper.hpp"
+#include "psamask_cuda_kernel.cuh"
+
+void PSAMaskForwardCUDAKernelLauncher(const int psa_type,
+                                      const DArrayLite input, DArrayLite output,
+                                      const int num_, const int h_feature,
+                                      const int w_feature, const int h_mask,
+                                      const int w_mask, const int half_h_mask,
+                                      const int half_w_mask, CudaContext& ctx) {
+  int nthreads = num_ * h_feature * w_feature;
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  if (psa_type == 0)
+    PARROTS_DISPATCH_FLOATING_TYPES(input.elemType().prim(), [&] {
+      psamask_collect_forward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
+          nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+          half_w_mask, input.ptr<scalar_t>(), output.ptr<scalar_t>());
+    });
+  else
+    PARROTS_DISPATCH_FLOATING_TYPES(input.elemType().prim(), [&] {
+      psamask_distribute_forward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
+          nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+          half_w_mask, input.ptr<scalar_t>(), output.ptr<scalar_t>());
+    });
+}
+
+void PSAMaskBackwardCUDAKernelLauncher(
+    const int psa_type, const DArrayLite grad_output, DArrayLite grad_input,
+    const int num_, const int h_feature, const int w_feature, const int h_mask,
+    const int w_mask, const int half_h_mask, const int half_w_mask,
+    CudaContext& ctx) {
+  int nthreads = num_ * h_feature * w_feature;
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  if (psa_type == 0)
+    PARROTS_DISPATCH_FLOATING_TYPES(grad_input.elemType().prim(), [&] {
+      psamask_collect_backward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
+          nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+          half_w_mask, grad_output.ptr<scalar_t>(), grad_input.ptr<scalar_t>());
+    });
+  else
+    PARROTS_DISPATCH_FLOATING_TYPES(grad_input.elemType().prim(), [&] {
+      psamask_distribute_backward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
+          nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+          half_w_mask, grad_output.ptr<scalar_t>(), grad_input.ptr<scalar_t>());
+    });
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3adf2b01961ea7b27c5d39892d9d72754cdcb2a3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align.cpp
@@ -0,0 +1,177 @@
+// Copyright (c) 2018, SenseTime.
+#include "parrots_cpp_helper.hpp"
+
+void ROIAlignForwardCPULauncher(DArrayLite input, DArrayLite rois,
+                                DArrayLite output, DArrayLite argmax_y,
+                                DArrayLite argmax_x, int aligned_height,
+                                int aligned_width, float spatial_scale,
+                                int sampling_ratio, int pool_mode,
+                                bool aligned);
+
+void ROIAlignBackwardCPULauncher(DArrayLite grad_output, DArrayLite rois,
+                                 DArrayLite argmax_y, DArrayLite argmax_x,
+                                 DArrayLite grad_input, int aligned_height,
+                                 int aligned_width, float spatial_scale,
+                                 int sampling_ratio, int pool_mode,
+                                 bool aligned);
+
+void ROIAlignForwardCUDAKernelLauncher(DArrayLite input, DArrayLite rois,
+                                       DArrayLite output, DArrayLite argmax_y,
+                                       DArrayLite argmax_x, int aligned_height,
+                                       int aligned_width, float spatial_scale,
+                                       int sampling_ratio, int pool_mode,
+                                       bool aligned, cudaStream_t stream);
+
+void ROIAlignBackwardCUDAKernelLauncher(
+    DArrayLite grad_output, DArrayLite rois, DArrayLite argmax_y,
+    DArrayLite argmax_x, DArrayLite grad_input, int aligned_height,
+    int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode,
+    bool aligned, cudaStream_t stream);
+
+void roi_align_forward_cpu(HostContext& ctx, const SSElement& attr,
+                           const OperatorBase::in_list_t& ins,
+                           OperatorBase::out_list_t& outs) {
+  int aligned_height;
+  int aligned_width;
+  float spatial_scale;
+  int sampling_ratio;
+  int pool_mode;
+  bool aligned;
+  SSAttrs(attr)
+      .get<int>("aligned_height", aligned_height)
+      .get<int>("aligned_width", aligned_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .get<int>("sampling_ratio", sampling_ratio)
+      .get<int>("pool_mode", pool_mode)
+      .get<bool>("aligned", aligned)
+      .done();
+
+  auto& input = ins[0];
+  auto& rois = ins[1];
+  auto& output = outs[0];
+  auto& argmax_y = outs[1];
+  auto& argmax_x = outs[2];
+
+  ROIAlignForwardCPULauncher(input, rois, output, argmax_y, argmax_x,
+                             aligned_height, aligned_width, spatial_scale,
+                             sampling_ratio, pool_mode, aligned);
+}
+
+void roi_align_backward_cpu(HostContext& ctx, const SSElement& attr,
+                            const OperatorBase::in_list_t& ins,
+                            OperatorBase::out_list_t& outs) {
+  int aligned_height;
+  int aligned_width;
+  float spatial_scale;
+  int sampling_ratio;
+  int pool_mode;
+  bool aligned;
+  SSAttrs(attr)
+      .get<int>("aligned_height", aligned_height)
+      .get<int>("aligned_width", aligned_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .get<int>("sampling_ratio", sampling_ratio)
+      .get<int>("pool_mode", pool_mode)
+      .get<bool>("aligned", aligned)
+      .done();
+
+  auto& grad_output = ins[0];
+  auto& rois = ins[1];
+  auto& argmax_y = ins[2];
+  auto& argmax_x = ins[3];
+  auto& grad_input = outs[0];
+
+  ROIAlignBackwardCPULauncher(grad_output, rois, argmax_y, argmax_x, grad_input,
+                              aligned_height, aligned_width, spatial_scale,
+                              sampling_ratio, pool_mode, aligned);
+}
+
+void roi_align_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                            const OperatorBase::in_list_t& ins,
+                            OperatorBase::out_list_t& outs) {
+  int aligned_height;
+  int aligned_width;
+  float spatial_scale;
+  int sampling_ratio;
+  int pool_mode;
+  bool aligned;
+  SSAttrs(attr)
+      .get<int>("aligned_height", aligned_height)
+      .get<int>("aligned_width", aligned_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .get<int>("sampling_ratio", sampling_ratio)
+      .get<int>("pool_mode", pool_mode)
+      .get<bool>("aligned", aligned)
+      .done();
+
+  auto& input = ins[0];
+  auto& rois = ins[1];
+  auto& output = outs[0];
+  auto& argmax_y = outs[1];
+  auto& argmax_x = outs[2];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  ROIAlignForwardCUDAKernelLauncher(
+      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
+      spatial_scale, sampling_ratio, pool_mode, aligned, stream);
+}
+
+void roi_align_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                             const OperatorBase::in_list_t& ins,
+                             OperatorBase::out_list_t& outs) {
+  int aligned_height;
+  int aligned_width;
+  float spatial_scale;
+  int sampling_ratio;
+  int pool_mode;
+  bool aligned;
+  SSAttrs(attr)
+      .get<int>("aligned_height", aligned_height)
+      .get<int>("aligned_width", aligned_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .get<int>("sampling_ratio", sampling_ratio)
+      .get<int>("pool_mode", pool_mode)
+      .get<bool>("aligned", aligned)
+      .done();
+
+  auto& grad_output = ins[0];
+  auto& rois = ins[1];
+  auto& argmax_y = ins[2];
+  auto& argmax_x = ins[3];
+  auto& grad_input = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  ROIAlignBackwardCUDAKernelLauncher(
+      grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height,
+      aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(roi_align_forward)
+    .attr("aligned_height")
+    .attr("aligned_width")
+    .attr("spatial_scale")
+    .attr("sampling_ratio")
+    .attr("pool_mode")
+    .attr("aligned")
+    .input(2)
+    .output(3)
+    .apply(roi_align_forward_cpu)
+#ifdef PARROTS_USE_CUDA
+    .apply(roi_align_forward_cuda)
+#endif
+    .done();
+
+PARROTS_EXTENSION_REGISTER(roi_align_backward)
+    .attr("aligned_height")
+    .attr("aligned_width")
+    .attr("spatial_scale")
+    .attr("sampling_ratio")
+    .attr("pool_mode")
+    .attr("aligned")
+    .input(4)
+    .output(1)
+    .apply(roi_align_backward_cpu)
+#ifdef PARROTS_USE_CUDA
+    .apply(roi_align_backward_cuda)
+#endif
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align_cpu.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..39d440dff089ddcd02ad46d7af325f6de25d38fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align_cpu.cpp
@@ -0,0 +1,430 @@
+// Modified from
+// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+#include <iostream>
+
+#include "parrots_cpp_helper.hpp"
+
+// implementation taken from Caffe2
+template <typename T>
+struct PreCalc {
+  int pos1;
+  int pos2;
+  int pos3;
+  int pos4;
+  T w1;
+  T w2;
+  T w3;
+  T w4;
+};
+
+template <typename T>
+void pre_calc_for_bilinear_interpolate(
+    const int height, const int width, const int pooled_height,
+    const int pooled_width, const int iy_upper, const int ix_upper,
+    T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w,
+    int roi_bin_grid_h, int roi_bin_grid_w, std::vector<PreCalc<T>>& pre_calc) {
+  int pre_calc_index = 0;
+  for (int ph = 0; ph < pooled_height; ph++) {
+    for (int pw = 0; pw < pooled_width; pw++) {
+      for (int iy = 0; iy < iy_upper; iy++) {
+        const T yy = roi_start_h + ph * bin_size_h +
+                     static_cast<T>(iy + .5f) * bin_size_h /
+                         static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
+        for (int ix = 0; ix < ix_upper; ix++) {
+          const T xx = roi_start_w + pw * bin_size_w +
+                       static_cast<T>(ix + .5f) * bin_size_w /
+                           static_cast<T>(roi_bin_grid_w);
+
+          T x = xx;
+          T y = yy;
+          // deal with: inverse elements are out of feature map boundary
+          if (y < -1.0 || y > height || x < -1.0 || x > width) {
+            // empty
+            PreCalc<T> pc;
+            pc.pos1 = 0;
+            pc.pos2 = 0;
+            pc.pos3 = 0;
+            pc.pos4 = 0;
+            pc.w1 = 0;
+            pc.w2 = 0;
+            pc.w3 = 0;
+            pc.w4 = 0;
+            pre_calc[pre_calc_index] = pc;
+            pre_calc_index += 1;
+            continue;
+          }
+
+          if (y <= 0) {
+            y = 0;
+          }
+          if (x <= 0) {
+            x = 0;
+          }
+
+          int y_low = (int)y;
+          int x_low = (int)x;
+          int y_high;
+          int x_high;
+
+          if (y_low >= height - 1) {
+            y_high = y_low = height - 1;
+            y = (T)y_low;
+          } else {
+            y_high = y_low + 1;
+          }
+
+          if (x_low >= width - 1) {
+            x_high = x_low = width - 1;
+            x = (T)x_low;
+          } else {
+            x_high = x_low + 1;
+          }
+
+          T ly = y - y_low;
+          T lx = x - x_low;
+          T hy = 1. - ly, hx = 1. - lx;
+          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+          // save weights and indices
+          PreCalc<T> pc;
+          pc.pos1 = y_low * width + x_low;
+          pc.pos2 = y_low * width + x_high;
+          pc.pos3 = y_high * width + x_low;
+          pc.pos4 = y_high * width + x_high;
+          pc.w1 = w1;
+          pc.w2 = w2;
+          pc.w3 = w3;
+          pc.w4 = w4;
+          pre_calc[pre_calc_index] = pc;
+
+          pre_calc_index += 1;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+void ROIAlignForward(const int nthreads, const T* input, const T* rois,
+                     T* output, T* argmax_y, T* argmax_x,
+                     const int pooled_height, const int pooled_width,
+                     const T spatial_scale, const int sampling_ratio,
+                     const int pool_mode,  // 0 - max pool, 1 - avg pool
+                     const bool aligned, const int channels, const int height,
+                     const int width) {
+  int n_rois = nthreads / channels / pooled_width / pooled_height;
+  // (n, c, ph, pw) is an element in the pooled output
+  // can be parallelized using omp
+  // #pragma omp parallel for num_threads(32)
+  for (int n = 0; n < n_rois; n++) {
+    int index_n = n * channels * pooled_width * pooled_height;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_start_w = offset_rois[1] * spatial_scale - offset;
+    T roi_start_h = offset_rois[2] * spatial_scale - offset;
+    T roi_end_w = offset_rois[3] * spatial_scale - offset;
+    T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (aligned) {
+      PARROTS_CHECKARGS(roi_width >= 0 && roi_height >= 0)
+          << "ROIs in ROIAlign cannot have non-negative size!";
+    } else {  // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : ceil(roi_height / pooled_height);  // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+    // When the grid is empty, output zeros == 0/1, instead of NaN.
+    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4
+
+    // we want to precalculate indices and weights shared by all channels,
+    // this is the key point of optimization
+    std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
+                                     pooled_width * pooled_height);
+    pre_calc_for_bilinear_interpolate(
+        height, width, pooled_height, pooled_width, roi_bin_grid_h,
+        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
+        roi_bin_grid_h, roi_bin_grid_w, pre_calc);
+
+    for (int c = 0; c < channels; c++) {
+      int index_n_c = index_n + c * pooled_width * pooled_height;
+      const T* offset_input =
+          input + (roi_batch_ind * channels + c) * height * width;
+      int pre_calc_index = 0;
+
+      for (int ph = 0; ph < pooled_height; ph++) {
+        for (int pw = 0; pw < pooled_width; pw++) {
+          int index = index_n_c + ph * pooled_width + pw;
+
+          T output_val = 0.;
+          T maxval = -10000;
+          T maxidx_y = -1.f, maxidx_x = -1.f;
+          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+            const T y = roi_start_h + ph * bin_size_h +
+                        static_cast<T>(iy + .5f) * bin_size_h /
+                            static_cast<T>(roi_bin_grid_h);
+            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+              const T x = roi_start_w + pw * bin_size_w +
+                          static_cast<T>(ix + .5f) * bin_size_w /
+                              static_cast<T>(roi_bin_grid_w);
+              PreCalc<T> pc = pre_calc[pre_calc_index];
+              T val = pc.w1 * offset_input[pc.pos1] +
+                      pc.w2 * offset_input[pc.pos2] +
+                      pc.w3 * offset_input[pc.pos3] +
+                      pc.w4 * offset_input[pc.pos4];
+              if (val > maxval) {
+                maxval = val;
+                maxidx_y = y;
+                maxidx_x = x;
+              }
+              output_val += val;
+              pre_calc_index += 1;
+            }
+          }
+          if (pool_mode == 0) {
+            // We do max pooling inside a bin
+            output[index] = maxval;
+            argmax_y[index] = maxidx_y;
+            argmax_x[index] = maxidx_x;
+          } else if (pool_mode == 1) {
+            // We do average (integral) pooling inside a bin
+            output[index] = output_val / count;
+          }  // if
+        }    // for pw
+      }      // for ph
+    }        // for c
+  }          // for n
+}
+
+template <typename T>
+void bilinear_interpolate_gradient(const int height, const int width, T y, T x,
+                                   T& w1, T& w2, T& w3, T& w4, int& x_low,
+                                   int& x_high, int& y_low, int& y_high,
+                                   const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;
+
+  y_low = (int)y;
+  x_low = (int)x;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  return;
+}
+
+template <class T>
+inline void add(T* address, const T& val) {
+  *address += val;
+}
+
+template <typename T>
+void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois,
+                      const T* argmax_y, const T* argmax_x, T* grad_input,
+                      const int pooled_height, const int pooled_width,
+                      const T spatial_scale, const int sampling_ratio,
+                      const int pool_mode,  // 0 - max pool, 1 - avg pool
+                      const bool aligned, const int channels, const int height,
+                      const int width, const int n_stride, const int c_stride,
+                      const int h_stride, const int w_stride) {
+  for (int index = 0; index < nthreads; index++) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_start_w = offset_rois[1] * spatial_scale - offset;
+    T roi_start_h = offset_rois[2] * spatial_scale - offset;
+    T roi_end_w = offset_rois[3] * spatial_scale - offset;
+    T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (aligned) {
+      PARROTS_CHECKARGS(roi_width >= 0 && roi_height >= 0)
+          << "ROIs in ROIAlign do not have non-negative size!";
+    } else {  // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    T* offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+    int output_offset = n * n_stride + c * c_stride;
+    const T* offset_grad_output = grad_output + output_offset;
+    const T grad_output_this_bin =
+        offset_grad_output[ph * h_stride + pw * w_stride];
+
+    if (pool_mode == 0) {
+      // We do max pooling inside a bin
+      T y = argmax_y[index], x = argmax_x[index];
+      if (y != -1.f) {
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                      x_low, x_high, y_low, y_high, index);
+
+        T g1 = grad_output_this_bin * w1;
+        T g2 = grad_output_this_bin * w2;
+        T g3 = grad_output_this_bin * w3;
+        T g4 = grad_output_this_bin * w4;
+
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          // atomic add is not needed for now since it is single threaded
+          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
+        }  // if
+      }    // mode
+    } else if (pool_mode == 1) {
+      // We do average (integral) pooling inside a bin
+      // We use roi_bin_grid to sample the grid and mimic integral
+      int roi_bin_grid_h = (sampling_ratio > 0)
+                               ? sampling_ratio
+                               : ceil(roi_height / pooled_height);  // e.g., = 2
+      int roi_bin_grid_w = (sampling_ratio > 0)
+                               ? sampling_ratio
+                               : ceil(roi_width / pooled_width);
+
+      const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4
+      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+        const T y = roi_start_h + ph * bin_size_h +
+                    static_cast<T>(iy + .5f) * bin_size_h /
+                        static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
+        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+          const T x = roi_start_w + pw * bin_size_w +
+                      static_cast<T>(ix + .5f) * bin_size_w /
+                          static_cast<T>(roi_bin_grid_w);
+
+          T w1, w2, w3, w4;
+          int x_low, x_high, y_low, y_high;
+
+          bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                        x_low, x_high, y_low, y_high, index);
+
+          T g1 = grad_output_this_bin * w1 / count;
+          T g2 = grad_output_this_bin * w2 / count;
+          T g3 = grad_output_this_bin * w3 / count;
+          T g4 = grad_output_this_bin * w4 / count;
+
+          if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+            // atomic add is not needed for now since it is single threaded
+            add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+            add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+            add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+            add(offset_grad_input + y_high * width + x_high,
+                static_cast<T>(g4));
+          }  // if
+        }    // ix
+      }      // iy
+    }        // mode
+  }          // for
+}  // ROIAlignBackward
+
+void ROIAlignForwardCPULauncher(DArrayLite input, DArrayLite rois,
+                                DArrayLite output, DArrayLite argmax_y,
+                                DArrayLite argmax_x, int aligned_height,
+                                int aligned_width, float spatial_scale,
+                                int sampling_ratio, int pool_mode,
+                                bool aligned) {
+  int output_size = output.size();
+  int channels = input.dim(1);
+  int height = input.dim(2);
+  int width = input.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        ROIAlignForward<scalar_t>(
+            output_size, input.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+            output.ptr<scalar_t>(), argmax_y.ptr<scalar_t>(),
+            argmax_x.ptr<scalar_t>(), aligned_height, aligned_width,
+            static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+            aligned, channels, height, width);
+      }));
+}
+
+void ROIAlignBackwardCPULauncher(DArrayLite grad_output, DArrayLite rois,
+                                 DArrayLite argmax_y, DArrayLite argmax_x,
+                                 DArrayLite grad_input, int aligned_height,
+                                 int aligned_width, float spatial_scale,
+                                 int sampling_ratio, int pool_mode,
+                                 bool aligned) {
+  int output_size = grad_output.size();
+  int channels = grad_input.dim(1);
+  int height = grad_input.dim(2);
+  int width = grad_input.dim(3);
+
+  // get stride values to ensure indexing into gradients is correct.
+  int n_stride = grad_output.stride(0);
+  int c_stride = grad_output.stride(1);
+  int h_stride = grad_output.stride(2);
+  int w_stride = grad_output.stride(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.elemType().prim(), ([&] {
+        ROIAlignBackward<scalar_t>(
+            output_size, grad_output.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+            argmax_y.ptr<scalar_t>(), argmax_x.ptr<scalar_t>(),
+            grad_input.ptr<scalar_t>(), aligned_height, aligned_width,
+            static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+            aligned, channels, height, width, n_stride, c_stride, h_stride,
+            w_stride);
+      }));
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..05eb36d2c73be482791727410eb17cc03b8addef
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_align_cuda.cu
@@ -0,0 +1,51 @@
+#include "parrots_cuda_helper.hpp"
+#include "roi_align_cuda_kernel.cuh"
+
+void ROIAlignForwardCUDAKernelLauncher(DArrayLite input, DArrayLite rois,
+                                       DArrayLite output, DArrayLite argmax_y,
+                                       DArrayLite argmax_x, int aligned_height,
+                                       int aligned_width, float spatial_scale,
+                                       int sampling_ratio, int pool_mode,
+                                       bool aligned, cudaStream_t stream) {
+  int output_size = output.size();
+  int channels = input.dim(1);
+  int height = input.dim(2);
+  int width = input.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        roi_align_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+                output.ptr<scalar_t>(), argmax_y.ptr<scalar_t>(),
+                argmax_x.ptr<scalar_t>(), aligned_height, aligned_width,
+                static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+                aligned, channels, height, width);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void ROIAlignBackwardCUDAKernelLauncher(
+    DArrayLite grad_output, DArrayLite rois, DArrayLite argmax_y,
+    DArrayLite argmax_x, DArrayLite grad_input, int aligned_height,
+    int aligned_width, float spatial_scale, int sampling_ratio, int pool_mode,
+    bool aligned, cudaStream_t stream) {
+  int output_size = grad_output.size();
+  int channels = grad_input.dim(1);
+  int height = grad_input.dim(2);
+  int width = grad_input.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.elemType().prim(), ([&] {
+        roi_align_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+                argmax_y.ptr<scalar_t>(), argmax_x.ptr<scalar_t>(),
+                grad_input.ptr<scalar_t>(), aligned_height, aligned_width,
+                static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+                aligned, channels, height, width);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_pool.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0f9d75432760d72f7f8cd238696ec409557983fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_pool.cpp
@@ -0,0 +1,77 @@
+#include "parrots_cpp_helper.hpp"
+
+void ROIPoolForwardCUDAKernelLauncher(const DArrayLite input,
+                                      const DArrayLite rois, DArrayLite output,
+                                      DArrayLite argmax, int pooled_height,
+                                      int pooled_width, float spatial_scale,
+                                      cudaStream_t stream);
+
+void ROIPoolBackwardCUDAKernelLauncher(const DArrayLite grad_output,
+                                       const DArrayLite rois,
+                                       const DArrayLite argmax,
+                                       DArrayLite grad_input, int pooled_height,
+                                       int pooled_width, float spatial_scale,
+                                       cudaStream_t stream);
+
+void roi_pool_forward_cuda(CudaContext& ctx, const SSElement& attr,
+                           const OperatorBase::in_list_t& ins,
+                           OperatorBase::out_list_t& outs) {
+  int pooled_height;
+  int pooled_width;
+  float spatial_scale;
+  SSAttrs(attr)
+      .get<int>("pooled_height", pooled_height)
+      .get<int>("pooled_width", pooled_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .done();
+
+  const auto& input = ins[0];
+  const auto& rois = ins[1];
+  auto& output = outs[0];
+  auto& argmax = outs[1];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height,
+                                   pooled_width, spatial_scale, stream);
+}
+
+void roi_pool_backward_cuda(CudaContext& ctx, const SSElement& attr,
+                            const OperatorBase::in_list_t& ins,
+                            OperatorBase::out_list_t& outs) {
+  int pooled_height;
+  int pooled_width;
+  float spatial_scale;
+  SSAttrs(attr)
+      .get<int>("pooled_height", pooled_height)
+      .get<int>("pooled_width", pooled_width)
+      .get<float>("spatial_scale", spatial_scale)
+      .done();
+
+  const auto& grad_output = ins[0];
+  const auto& rois = ins[1];
+  const auto& argmax = ins[2];
+  auto& grad_input = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input,
+                                    pooled_height, pooled_width, spatial_scale,
+                                    stream);
+}
+
+PARROTS_EXTENSION_REGISTER(roi_pool_forward)
+    .attr("pooled_height")
+    .attr("pooled_width")
+    .attr("spatial_scale")
+    .input(2)
+    .output(2)
+    .apply(roi_pool_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(roi_pool_backward)
+    .attr("pooled_height")
+    .attr("pooled_width")
+    .attr("spatial_scale")
+    .input(3)
+    .output(1)
+    .apply(roi_pool_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_pool_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_pool_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a94ffcea2b533b9f1d50458909646d4607bb94ee
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/roi_pool_cuda.cu
@@ -0,0 +1,45 @@
+#include "parrots_cuda_helper.hpp"
+#include "roi_pool_cuda_kernel.cuh"
+
+void ROIPoolForwardCUDAKernelLauncher(const DArrayLite input,
+                                      const DArrayLite rois, DArrayLite output,
+                                      DArrayLite argmax, int pooled_height,
+                                      int pooled_width, float spatial_scale,
+                                      cudaStream_t stream) {
+  int output_size = output.size();
+  int channels = input.dim(1);
+  int height = input.dim(2);
+  int width = input.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(input.elemType().prim(), [&] {
+    roi_pool_forward_cuda_kernel<scalar_t>
+        <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+            output_size, input.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+            output.ptr<scalar_t>(), argmax.ptr<int>(), pooled_height,
+            pooled_width, spatial_scale, channels, height, width);
+  });
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void ROIPoolBackwardCUDAKernelLauncher(const DArrayLite grad_output,
+                                       const DArrayLite rois,
+                                       const DArrayLite argmax,
+                                       DArrayLite grad_input, int pooled_height,
+                                       int pooled_width, float spatial_scale,
+                                       cudaStream_t stream) {
+  int output_size = grad_output.size();
+  int channels = grad_output.dim(1);
+  int height = grad_output.dim(2);
+  int width = grad_output.dim(3);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(grad_output.elemType().prim(), [&] {
+    roi_pool_backward_cuda_kernel<scalar_t>
+        <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+            output_size, grad_output.ptr<scalar_t>(), rois.ptr<scalar_t>(),
+            argmax.ptr<int>(), grad_input.ptr<scalar_t>(), pooled_height,
+            pooled_width, channels, height, width);
+  });
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/sync_bn.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/sync_bn.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eee2a432d61d11a67d8dea95f4247ed675e32e38
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/sync_bn.cpp
@@ -0,0 +1,139 @@
+#include "parrots_cpp_helper.hpp"
+
+void SyncBNForwardMeanCUDAKernelLauncher(const DArrayLite input,
+                                         DArrayLite mean, cudaStream_t stream);
+
+void SyncBNForwardVarCUDAKernelLauncher(const DArrayLite input,
+                                        const DArrayLite mean, DArrayLite var,
+                                        cudaStream_t stream);
+
+void SyncBNForwardOutputCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite mean, const DArrayLite var,
+    DArrayLite running_mean, DArrayLite running_var, const DArrayLite weight,
+    const DArrayLite bias, DArrayLite norm, DArrayLite std, DArrayLite output,
+    const float eps, const float momentum, size_t group_size,
+    cudaStream_t stream);
+
+void SyncBNBackwardParamCUDAKernelLauncher(const DArrayLite grad_output,
+                                           const DArrayLite norm,
+                                           DArrayLite weight_diff,
+                                           DArrayLite bias_diff,
+                                           cudaStream_t stream);
+
+void SyncBNBackwardDataCUDAKernelLauncher(
+    const DArrayLite grad_output, const DArrayLite weight,
+    const DArrayLite weight_diff, const DArrayLite bias_diff,
+    const DArrayLite norm, const DArrayLite std, DArrayLite grad_input,
+    cudaStream_t stream);
+
+void sync_bn_forward_mean_cuda(CudaContext& ctx, const SSElement& attr,
+                               const OperatorBase::in_list_t& ins,
+                               OperatorBase::out_list_t& outs) {
+  const auto& input = ins[0];
+  auto& mean = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SyncBNForwardMeanCUDAKernelLauncher(input, mean, stream);
+}
+
+void sync_bn_forward_var_cuda(CudaContext& ctx, const SSElement& attr,
+                              const OperatorBase::in_list_t& ins,
+                              OperatorBase::out_list_t& outs) {
+  const auto& input = ins[0];
+  const auto& mean = ins[1];
+  auto& var = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SyncBNForwardVarCUDAKernelLauncher(input, mean, var, stream);
+}
+
+void sync_bn_forward_output_cuda(CudaContext& ctx, const SSElement& attr,
+                                 const OperatorBase::in_list_t& ins,
+                                 OperatorBase::out_list_t& outs) {
+  size_t group_size;
+  float eps, momentum;
+  SSAttrs(attr)
+      .get<float>("eps", eps)
+      .get<float>("momentum", momentum)
+      .get<size_t>("group_size", group_size)
+      .done();
+
+  const auto& input = ins[0];
+  const auto& mean = ins[1];
+  const auto& var = ins[2];
+  const auto& weight = ins[3];
+  const auto& bias = ins[4];
+  auto& running_mean = outs[0];
+  auto& running_var = outs[1];
+  auto& norm = outs[2];
+  auto& std = outs[3];
+  auto& output = outs[4];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SyncBNForwardOutputCUDAKernelLauncher(
+      input, mean, var, running_mean, running_var, weight, bias, norm, std,
+      output, eps, momentum, group_size, stream);
+}
+
+void sync_bn_backward_param_cuda(CudaContext& ctx, const SSElement& attr,
+                                 const OperatorBase::in_list_t& ins,
+                                 OperatorBase::out_list_t& outs) {
+  const auto& grad_output = ins[0];
+  const auto& norm = ins[1];
+  auto& grad_weight = outs[0];
+  auto& grad_bias = outs[1];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight,
+                                        grad_bias, stream);
+}
+
+void sync_bn_backward_data_cuda(CudaContext& ctx, const SSElement& attr,
+                                const OperatorBase::in_list_t& ins,
+                                OperatorBase::out_list_t& outs) {
+  const auto& grad_output = ins[0];
+  const auto& weight = ins[1];
+  const auto& grad_weight = ins[2];
+  const auto& grad_bias = ins[3];
+  const auto& norm = ins[4];
+  const auto& std = ins[5];
+  auto& grad_input = outs[0];
+
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight,
+                                       grad_bias, norm, std, grad_input,
+                                       stream);
+}
+
+PARROTS_EXTENSION_REGISTER(sync_bn_forward_mean)
+    .input(1)
+    .output(1)
+    .apply(sync_bn_forward_mean_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(sync_bn_forward_var)
+    .input(2)
+    .output(1)
+    .apply(sync_bn_forward_var_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(sync_bn_forward_output)
+    .attr("eps")
+    .attr("momentum")
+    .attr("group_size")
+    .input(5)
+    .output(5)
+    .apply(sync_bn_forward_output_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(sync_bn_backward_param)
+    .input(2)
+    .output(2)
+    .apply(sync_bn_backward_param_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(sync_bn_backward_data)
+    .input(6)
+    .output(1)
+    .apply(sync_bn_backward_data_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/sync_bn_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/sync_bn_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9bc97e4ae00ae00666fcf27a8b5da9cf35b26f7c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/sync_bn_cuda.cu
@@ -0,0 +1,104 @@
+#include "parrots_cuda_helper.hpp"
+#include "sync_bn_cuda_kernel.cuh"
+
+void SyncBNForwardMeanCUDAKernelLauncher(const DArrayLite input,
+                                         DArrayLite mean, cudaStream_t stream) {
+  int num = input.dim(0);
+  int channels = input.dim(1);
+  int spatial = input.dim(2);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        sync_bn_forward_mean_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(input.ptr<scalar_t>(),
+                                                         mean.ptr<float>(), num,
+                                                         channels, spatial);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNForwardVarCUDAKernelLauncher(const DArrayLite input,
+                                        const DArrayLite mean, DArrayLite var,
+                                        cudaStream_t stream) {
+  int num = input.dim(0);
+  int channels = input.dim(1);
+  int spatial = input.dim(2);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        sync_bn_forward_var_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                input.ptr<scalar_t>(), mean.ptr<float>(), var.ptr<float>(), num,
+                channels, spatial);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNForwardOutputCUDAKernelLauncher(
+    const DArrayLite input, const DArrayLite mean, const DArrayLite var,
+    DArrayLite running_mean, DArrayLite running_var, const DArrayLite weight,
+    const DArrayLite bias, DArrayLite norm, DArrayLite std, DArrayLite output,
+    float eps, float momentum, size_t group_size, cudaStream_t stream) {
+  int num = input.dim(0);
+  int channels = input.dim(1);
+  int spatial = input.dim(2);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        sync_bn_forward_output_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                input.ptr<scalar_t>(), mean.ptr<float>(), var.ptr<float>(),
+                running_mean.ptr<float>(), running_var.ptr<float>(),
+                weight.ptr<float>(), bias.ptr<float>(), norm.ptr<float>(),
+                std.ptr<float>(), output.ptr<scalar_t>(), num, channels,
+                spatial, eps, momentum, group_size);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNBackwardParamCUDAKernelLauncher(const DArrayLite grad_output,
+                                           const DArrayLite norm,
+                                           DArrayLite grad_weight,
+                                           DArrayLite grad_bias,
+                                           cudaStream_t stream) {
+  int num = grad_output.dim(0);
+  int channels = grad_output.dim(1);
+  int spatial = grad_output.dim(2);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.elemType().prim(), ([&] {
+        sync_bn_backward_param_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                grad_output.ptr<scalar_t>(), norm.ptr<float>(),
+                grad_weight.ptr<float>(), grad_bias.ptr<float>(), num, channels,
+                spatial);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNBackwardDataCUDAKernelLauncher(
+    const DArrayLite grad_output, const DArrayLite weight,
+    const DArrayLite grad_weight, const DArrayLite grad_bias,
+    const DArrayLite norm, const DArrayLite std, DArrayLite grad_input,
+    cudaStream_t stream) {
+  int output_size = grad_input.size();
+  int num = grad_input.dim(0);
+  int channels = grad_input.dim(1);
+  int spatial = grad_input.dim(2);
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_input.elemType().prim(), ([&] {
+        sync_bn_backward_data_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.ptr<scalar_t>(), weight.ptr<float>(),
+                grad_weight.ptr<float>(), grad_bias.ptr<float>(),
+                norm.ptr<float>(), std.ptr<float>(), grad_input.ptr<scalar_t>(),
+                num, channels, spatial);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/tin_shift.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/tin_shift.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..17b48af41c1526f17038ed18e3efa54d5d5b6a00
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/tin_shift.cpp
@@ -0,0 +1,42 @@
+#include "parrots_cpp_helper.hpp"
+
+void TINShiftForwardCUDAKernelLauncher(const DArrayLite input,
+                                       const DArrayLite shift,
+                                       DArrayLite output, cudaStream_t stream);
+
+void TINShiftBackwardCUDAKernelLauncher(const DArrayLite grad_output,
+                                        const DArrayLite shift,
+                                        DArrayLite grad_input,
+                                        cudaStream_t stream);
+
+void tin_shift_forward_cuda(CudaContext &ctx, const SSElement &attr,
+                            const OperatorBase::in_list_t &ins,
+                            OperatorBase::out_list_t &outs) {
+  const auto &input = ins[0];
+  const auto &shift = ins[1];
+  auto &output = outs[0];
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  TINShiftForwardCUDAKernelLauncher(input, shift, output, stream);
+}
+
+void tin_shift_backward_cuda(CudaContext &ctx, const SSElement &attr,
+                             const OperatorBase::in_list_t &ins,
+                             OperatorBase::out_list_t &outs) {
+  const auto &grad_output = ins[0];
+  const auto &shift = ins[1];
+  auto &grad_input = outs[0];
+  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
+  TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input, stream);
+}
+
+PARROTS_EXTENSION_REGISTER(tin_shift_forward)
+    .input(2)
+    .output(1)
+    .apply(tin_shift_forward_cuda)
+    .done();
+
+PARROTS_EXTENSION_REGISTER(tin_shift_backward)
+    .input(2)
+    .output(1)
+    .apply(tin_shift_backward_cuda)
+    .done();
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/tin_shift_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/tin_shift_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e5deaec06123c27cac7312a31ba21fae110546ef
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots/tin_shift_cuda.cu
@@ -0,0 +1,51 @@
+#include "parrots_cuda_helper.hpp"
+#include "tin_shift_cuda_kernel.cuh"
+
+void TINShiftForwardCUDAKernelLauncher(const DArrayLite input,
+                                       const DArrayLite shift,
+                                       DArrayLite output, cudaStream_t stream) {
+  int output_size = output.size();
+  int batch_size = input.dim(0);
+  int t_size = input.dim(1);
+  int channels = input.dim(2);
+  int hw_size = input.dim(3);
+  int group_size = shift.dim(1);
+  int group_channel = channels / group_size;
+  int num_kernels = batch_size * hw_size * channels;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.elemType().prim(), ([&] {
+        tin_shift_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.ptr<scalar_t>(), shift.ptr<int>(),
+                output.ptr<scalar_t>(), batch_size, channels, t_size, hw_size,
+                group_size, group_channel);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
+
+void TINShiftBackwardCUDAKernelLauncher(const DArrayLite grad_output,
+                                        const DArrayLite shift,
+                                        DArrayLite grad_input,
+                                        cudaStream_t stream) {
+  int output_size = grad_output.size();
+  int batch_size = grad_output.dim(0);
+  int t_size = grad_output.dim(1);
+  int channels = grad_output.dim(2);
+  int hw_size = grad_output.dim(3);
+  int group_size = shift.dim(1);
+  int group_channel = channels / group_size;
+  int num_kernels = batch_size * hw_size * channels;
+
+  PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.elemType().prim(), ([&] {
+        tin_shift_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.ptr<scalar_t>(), shift.ptr<int>(),
+                grad_input.ptr<scalar_t>(), batch_size, channels, t_size,
+                hw_size, group_size, group_channel);
+      }));
+
+  PARROTS_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cpp_helper.hpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cpp_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..72701890dd727db911a1c0ce4d6790c1b531348d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cpp_helper.hpp
@@ -0,0 +1,40 @@
+#ifndef PARROTS_CPP_HELPER
+#define PARROTS_CPP_HELPER
+#include <parrots/darray/darraymath.hpp>
+#include <parrots/extension.hpp>
+#include <parrots/foundation/darraylite.hpp>
+#include <parrots/foundation/ssattrs.hpp>
+#include <vector>
+
+using namespace parrots;
+
+#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
+  case prim_type: {                                     \
+    using scalar_t = type;                              \
+    return __VA_ARGS__();                               \
+  }
+
+#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
+  [&] {                                                             \
+    const auto& the_type = TYPE;                                    \
+    switch (the_type) {                                             \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
+      default:                                                      \
+        PARROTS_NOTSUPPORTED;                                       \
+    }                                                               \
+  }()
+
+#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
+  [&] {                                                              \
+    const auto& the_type = TYPE;                                     \
+    switch (the_type) {                                              \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
+      default:                                                       \
+        PARROTS_NOTSUPPORTED;                                        \
+    }                                                                \
+  }()
+
+#endif  // PARROTS_CPP_HELPER
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cuda_helper.hpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cuda_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..539009c3f91b46ea58a3a64f0875d799e8bd0b65
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cuda_helper.hpp
@@ -0,0 +1,111 @@
+#ifndef PARROTS_CUDA_HELPER
+#define PARROTS_CUDA_HELPER
+
+#include <cuda.h>
+#include <float.h>
+
+#include <parrots/darray/darraymath.hpp>
+#include <parrots/darray/mathfunctions.hpp>
+#include <parrots/extension.hpp>
+#include <parrots/foundation/darrayutil.hpp>
+#include <parrots/foundation/exceptions.hpp>
+#include <parrots/foundation/float16.hpp>
+#include <parrots/foundation/mathfunction.hpp>
+
+#include "common_cuda_helper.hpp"
+#include "parrots_cudawarpfunction.cuh"
+
+using namespace parrots;
+using phalf = float16;
+
+#define __PHALF(x) (x.y)
+
+#define PARROTS_CUDA_CHECK(exp)                         \
+  do {                                                  \
+    cudaError_t err = exp;                              \
+    if (err != cudaSuccess) {                           \
+      fprintf(stderr, "cudaCheckError() failed : %s\n", \
+              cudaGetErrorString(err));                 \
+      exit(-1);                                         \
+    }                                                   \
+  } while (0)
+
+#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
+  case prim_type: {                                     \
+    using scalar_t = type;                              \
+    return __VA_ARGS__();                               \
+  }
+
+#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
+  [&] {                                                             \
+    const auto& the_type = TYPE;                                    \
+    switch (the_type) {                                             \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
+      default:                                                      \
+        PARROTS_NOTSUPPORTED;                                       \
+    }                                                               \
+  }()
+
+#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
+  [&] {                                                              \
+    const auto& the_type = TYPE;                                     \
+    switch (the_type) {                                              \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
+      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
+      default:                                                       \
+        PARROTS_NOTSUPPORTED;                                        \
+    }                                                                \
+  }()
+
+/** atomicAdd **/
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+
+static __inline__ __device__ double atomicAdd(double* address, double val) {
+  unsigned long long int* address_as_ull = (unsigned long long int*)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  if (val == 0.0) return __longlong_as_double(old);
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+
+#endif
+
+static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) {
+  unsigned int* aligned =
+      (unsigned int*)((size_t)address - ((size_t)address & 2));
+  unsigned int old = *aligned;
+  unsigned int assumed;
+  unsigned short old_as_us;
+  do {
+    assumed = old;
+    old_as_us =
+        (unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff);
+
+#if __CUDACC_VER_MAJOR__ >= 9
+    float16 tmp;
+    tmp.x = old_as_us;
+    float16 sum = tmp + val;
+    unsigned short sum_as_us = sum.x;
+//         half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us))
+//         + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum);
+#else
+    unsigned short sum_as_us =
+        __float2half_rn(__half2float(old_as_us) + (float)(val));
+#endif
+
+    unsigned int sum_as_ui = (size_t)address & 2
+                                 ? (sum_as_us << 16) | (old & 0xffff)
+                                 : (old & 0xffff0000) | sum_as_us;
+    old = atomicCAS(aligned, assumed, sum_as_ui);
+  } while (assumed != old);
+  //__half_raw raw = {old_as_us};
+  // return float16(raw);
+  return *reinterpret_cast<float16*>(&old_as_us);
+}
+#endif  // PARROTS_CUDA_HELPER
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cudawarpfunction.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cudawarpfunction.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..7918a57452bbde9dc7c249b0c3dd2774aa1961bf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/parrots_cudawarpfunction.cuh
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019, SenseTime.
+ */
+
+#ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
+#define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
+
+#ifndef __CUDACC__
+#error cudawarpfunction.cuh should only be included by .cu files
+#endif
+#include <cuda.h>
+
+#include <parrots/foundation/common.hpp>
+
+#ifdef PARROTS_USE_HALF
+#include <cuda_fp16.h>
+#endif
+#ifdef __CUDA_ARCH__
+#define CUDA_INTRINSIC_FUNC(Expr) Expr
+#else
+#define CUDA_INTRINSIC_FUNC(Expr)
+#endif
+
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+#ifdef PARROTS_USE_HALF
+
+#if CUDA_VERSION < 9000
+
+__device__ inline float16 __shfl(float16 var, int srcLane, int width) {
+  CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width););
+}
+
+__device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) {
+  CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width););
+}
+
+__device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) {
+  CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width););
+}
+
+__device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) {
+  CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width););
+}
+
+#else  // CUDA_VERSION >= 9000
+
+__device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane,
+                                      int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width);
+                      return r;);
+}
+
+__device__ inline float16 __shfl_up_sync(unsigned mask, float16 var,
+                                         unsigned delta, int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(
+      float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;);
+}
+
+__device__ inline float16 __shfl_down_sync(unsigned mask, float16 var,
+                                           unsigned delta,
+                                           int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(
+      float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;);
+}
+
+__device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var,
+                                          int laneMask, int width) {
+  CUDA_INTRINSIC_FUNC(float16 r;
+                      r.y = __shfl_xor_sync(mask, var.y, laneMask, width);
+                      return r;);
+}
+
+#endif  // CUDA_VERSION < 9000
+
+#endif  // PARROTS_USE_HALF
+
+// warp shuffle interface with a dummy mask
+#if CUDA_VERSION < 9000
+
+template <typename T>
+__device__ inline T __shfl_sync(unsigned mask, T var, int srcLane,
+                                int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width););
+}
+
+template <typename T>
+__device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta,
+                                   int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width););
+}
+
+template <typename T>
+__device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta,
+                                     int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width););
+}
+
+template <typename T>
+__device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
+                                    int width = warpSize) {
+  CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width););
+}
+
+#endif  // CUDA_VERSION < 9000
+
+#endif  // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+#endif  // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/psamask_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/psamask_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..3e22944c8c09d44e6da7224cafaad913ff11583b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/psamask_cuda_kernel.cuh
@@ -0,0 +1,140 @@
+#ifndef PSAMASK_CUDA_KERNEL_CUH
+#define PSAMASK_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+// CUDA: grid stride looping
+#ifndef CUDA_KERNEL_LOOP
+#define CUDA_KERNEL_LOOP(i, n)                                 \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+#endif
+
+template <typename T>
+__global__ void psamask_collect_forward_cuda(
+    const int nthreads, const int h_feature, const int w_feature,
+    const int h_mask, const int w_mask, const int half_h_mask,
+    const int half_w_mask, const T* mask_data, T* buffer_data) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int w = index % w_feature;
+    const int h = (index / w_feature) % h_feature;
+    const int n = index / w_feature / h_feature;
+    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+    const int hstart = max(0, half_h_mask - h);
+    const int hend = min(h_mask, h_feature + half_h_mask - h);
+    const int wstart = max(0, half_w_mask - w);
+    const int wend = min(w_mask, w_feature + half_w_mask - w);
+    // (hidx,                    widx                   ) with mask-indexed
+    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+    for (int hidx = hstart; hidx < hend; hidx++) {
+      for (int widx = wstart; widx < wend; widx++) {
+        buffer_data[(n * h_feature * w_feature +
+                     (hidx + h - half_h_mask) * w_feature +
+                     (widx + w - half_w_mask)) *
+                        h_feature * w_feature +
+                    h * w_feature + w] = mask_data
+            [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) *
+                 w_feature +
+             w];
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void psamask_distribute_forward_cuda(
+    const int nthreads, const int h_feature, const int w_feature,
+    const int h_mask, const int w_mask, const int half_h_mask,
+    const int half_w_mask, const T* mask_data, T* buffer_data) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int w = index % w_feature;
+    const int h = (index / w_feature) % h_feature;
+    const int n = index / w_feature / h_feature;
+    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+    const int hstart = max(0, half_h_mask - h);
+    const int hend = min(h_mask, h_feature + half_h_mask - h);
+    const int wstart = max(0, half_w_mask - w);
+    const int wend = min(w_mask, w_feature + half_w_mask - w);
+    // (hidx,                    widx                   ) with mask-indexed
+    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+    for (int hidx = hstart; hidx < hend; hidx++) {
+      for (int widx = wstart; widx < wend; widx++) {
+        buffer_data[(n * h_feature * w_feature + h * w_feature + w) *
+                        h_feature * w_feature +
+                    (hidx + h - half_h_mask) * w_feature +
+                    (widx + w - half_w_mask)] = mask_data
+            [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) *
+                 w_feature +
+             w];
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void psamask_collect_backward_cuda(
+    const int nthreads, const int h_feature, const int w_feature,
+    const int h_mask, const int w_mask, const int half_h_mask,
+    const int half_w_mask, const T* buffer_diff, T* mask_diff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int w = index % w_feature;
+    const int h = (index / w_feature) % h_feature;
+    const int n = index / w_feature / h_feature;
+    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+    const int hstart = max(0, half_h_mask - h);
+    const int hend = min(h_mask, h_feature + half_h_mask - h);
+    const int wstart = max(0, half_w_mask - w);
+    const int wend = min(w_mask, w_feature + half_w_mask - w);
+    // (hidx,                    widx                   ) with mask-indexed
+    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+    for (int hidx = hstart; hidx < hend; hidx++) {
+      for (int widx = wstart; widx < wend; widx++) {
+        mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature +
+                   h) *
+                      w_feature +
+                  w] = buffer_diff[(n * h_feature * w_feature +
+                                    (hidx + h - half_h_mask) * w_feature +
+                                    (widx + w - half_w_mask)) *
+                                       h_feature * w_feature +
+                                   h * w_feature + w];
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void psamask_distribute_backward_cuda(
+    const int nthreads, const int h_feature, const int w_feature,
+    const int h_mask, const int w_mask, const int half_h_mask,
+    const int half_w_mask, const T* buffer_diff, T* mask_diff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int w = index % w_feature;
+    const int h = (index / w_feature) % h_feature;
+    const int n = index / w_feature / h_feature;
+    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+    const int hstart = max(0, half_h_mask - h);
+    const int hend = min(h_mask, h_feature + half_h_mask - h);
+    const int wstart = max(0, half_w_mask - w);
+    const int wend = min(w_mask, w_feature + half_w_mask - w);
+    // (hidx,                    widx                   ) with mask-indexed
+    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+    for (int hidx = hstart; hidx < hend; hidx++) {
+      for (int widx = wstart; widx < wend; widx++) {
+        mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature +
+                   h) *
+                      w_feature +
+                  w] =
+            buffer_diff[(n * h_feature * w_feature + h * w_feature + w) *
+                            h_feature * w_feature +
+                        (hidx + h - half_h_mask) * w_feature +
+                        (widx + w - half_w_mask)];
+      }
+    }
+  }
+}
+
+#endif  // PSAMASK_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..23bf7d43474838318d4c819dea5d22b9847ad253
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
@@ -0,0 +1,29 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
+                                    Tensor ious, const int mode,
+                                    const bool aligned, const int offset);
+
+void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
+                        const int mode, const bool aligned, const int offset) {
+  BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset);
+}
+#endif
+
+void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
+                   const int mode, const bool aligned, const int offset) {
+  if (bboxes1.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(bboxes1);
+    CHECK_CUDA_INPUT(bboxes2);
+    CHECK_CUDA_INPUT(ious);
+
+    bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset);
+#else
+    AT_ERROR("bbox_overlaps is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("bbox_overlaps is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d6e26c24d1f8e8d8da47b42f176a598c84ee6a89
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu
@@ -0,0 +1,22 @@
+#include "bbox_overlaps_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
+                                    Tensor ious, const int mode,
+                                    const bool aligned, const int offset) {
+  int output_size = ious.numel();
+  int num_bbox1 = bboxes1.size(0);
+  int num_bbox2 = bboxes2.size(0);
+
+  at::cuda::CUDAGuard device_guard(bboxes1.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      bboxes1.scalar_type(), "bbox_overlaps_cuda_kernel", ([&] {
+        bbox_overlaps_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                bboxes1.data_ptr<scalar_t>(), bboxes2.data_ptr<scalar_t>(),
+                ious.data_ptr<scalar_t>(), num_bbox1, num_bbox2, mode, aligned,
+                offset);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..01fc02f550d9e77cdb279e96af3f033a861eb6ba
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated.cpp
@@ -0,0 +1,29 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
+#include "pytorch_cpp_helper.hpp"
+
+void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
+                         const int mode_flag, const bool aligned);
+
+#ifdef MMCV_WITH_CUDA
+void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
+                          const int mode_flag, const bool aligned);
+#endif
+
+// Interface for Python
+// inline is needed to prevent multiple function definitions when this header is
+// included by different cpps
+void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
+                     const int mode_flag, const bool aligned) {
+  assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
+  if (boxes1.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    box_iou_rotated_cuda(boxes1, boxes2, ious, mode_flag, aligned);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  } else {
+    box_iou_rotated_cpu(boxes1, boxes2, ious, mode_flag, aligned);
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated_cpu.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2b434885a82ed76cf326520df908d303a25bb060
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated_cpu.cpp
@@ -0,0 +1,33 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
+#include "box_iou_rotated_utils.hpp"
+#include "pytorch_cpp_helper.hpp"
+
+template <typename T>
+void box_iou_rotated_cpu_kernel(const Tensor boxes1, const Tensor boxes2,
+                                Tensor ious, const int mode_flag,
+                                const bool aligned) {
+  int output_size = ious.numel();
+  auto num_boxes1 = boxes1.size(0);
+  auto num_boxes2 = boxes2.size(0);
+
+  if (aligned) {
+    for (int i = 0; i < output_size; i++) {
+      ious[i] = single_box_iou_rotated<T>(boxes1[i].data_ptr<T>(),
+                                          boxes2[i].data_ptr<T>(), mode_flag);
+    }
+  } else {
+    for (int i = 0; i < num_boxes1; i++) {
+      for (int j = 0; j < num_boxes2; j++) {
+        ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
+            boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>(), mode_flag);
+      }
+    }
+  }
+}
+
+void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
+                         const int mode_flag, const bool aligned) {
+  box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious, mode_flag, aligned);
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d399b5ce7f158d27f5becc62a912e2104feac27b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/box_iou_rotated_cuda.cu
@@ -0,0 +1,25 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+#include "box_iou_rotated_cuda.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
+                          const int mode_flag, const bool aligned) {
+  using scalar_t = float;
+  AT_ASSERTM(boxes1.type().is_cuda(), "boxes1 must be a CUDA tensor");
+  AT_ASSERTM(boxes2.type().is_cuda(), "boxes2 must be a CUDA tensor");
+
+  int output_size = ious.numel();
+  int num_boxes1 = boxes1.size(0);
+  int num_boxes2 = boxes2.size(0);
+
+  at::cuda::CUDAGuard device_guard(boxes1.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  box_iou_rotated_cuda_kernel<scalar_t>
+      <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+          num_boxes1, num_boxes2, boxes1.data_ptr<scalar_t>(),
+          boxes2.data_ptr<scalar_t>(), (scalar_t*)ious.data_ptr<scalar_t>(),
+          mode_flag, aligned);
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..67619284fade9b752ddb831f58da71a1224fdc26
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe.cpp
@@ -0,0 +1,83 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
+                                     Tensor rfeatures, Tensor routput,
+                                     Tensor rmasks, Tensor output,
+                                     const int kernel_size,
+                                     const int group_size,
+                                     const int scale_factor);
+
+void CARAFEBackwardCUDAKernelLauncher(
+    const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
+    Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
+    Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
+    const int kernel_size, const int group_size, const int scale_factor);
+
+void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
+                         Tensor routput, Tensor rmasks, Tensor output,
+                         int kernel_size, int group_size, int scale_factor) {
+  CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks,
+                                  output, kernel_size, group_size,
+                                  scale_factor);
+}
+
+void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
+                          Tensor rtop_grad, Tensor rbottom_grad_hs,
+                          Tensor rbottom_grad, Tensor rmask_grad,
+                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
+                          int group_size, int scale_factor) {
+  CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad,
+                                   rbottom_grad_hs, rbottom_grad, rmask_grad,
+                                   bottom_grad, mask_grad, kernel_size,
+                                   group_size, scale_factor);
+}
+#endif
+
+void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
+                    Tensor routput, Tensor rmasks, Tensor output,
+                    int kernel_size, int group_size, int scale_factor) {
+  if (features.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(features);
+    CHECK_CUDA_INPUT(masks);
+    CHECK_CUDA_INPUT(rfeatures);
+    CHECK_CUDA_INPUT(routput);
+    CHECK_CUDA_INPUT(rmasks);
+    CHECK_CUDA_INPUT(output);
+    carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output,
+                        kernel_size, group_size, scale_factor);
+#else
+    AT_ERROR("Carafe is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("Carafe is not implemented on CPU");
+  }
+}
+
+void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
+                     Tensor rtop_grad, Tensor rbottom_grad_hs,
+                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
+                     Tensor mask_grad, int kernel_size, int group_size,
+                     int scale_factor) {
+  if (top_grad.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(top_grad);
+    CHECK_CUDA_INPUT(rfeatures);
+    CHECK_CUDA_INPUT(masks);
+    CHECK_CUDA_INPUT(rtop_grad);
+    CHECK_CUDA_INPUT(rbottom_grad_hs);
+    CHECK_CUDA_INPUT(rbottom_grad);
+    CHECK_CUDA_INPUT(rmask_grad);
+    CHECK_CUDA_INPUT(bottom_grad);
+    CHECK_CUDA_INPUT(mask_grad);
+    carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
+                         rbottom_grad, rmask_grad, bottom_grad, mask_grad,
+                         kernel_size, group_size, scale_factor);
+#else
+    AT_ERROR("Carafe is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("Carafe is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2f9ac053024f59dc7e26c21ab9b0845a813f3cbf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_cuda.cu
@@ -0,0 +1,179 @@
+#include "carafe_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
+                                     Tensor rfeatures, Tensor routput,
+                                     Tensor rmasks, Tensor output,
+                                     const int kernel_size,
+                                     const int group_size,
+                                     const int scale_factor) {
+  const int batch_size = output.size(0);
+  const int channels = output.size(1);
+  const int output_height = output.size(2);
+  const int output_width = output.size(3);
+
+  const int input_height = features.size(2);
+  const int input_width = features.size(3);
+
+  const int mask_channels = masks.size(1);
+
+  rfeatures.resize_({batch_size, input_height, input_width, channels});
+  routput.resize_({batch_size, output_height, output_width, channels});
+  rmasks.resize_({batch_size, output_height, output_width, mask_channels});
+
+  // one warp per pixel
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.scalar_type(), "NCHW2NHWC_Feature", ([&] {
+        const scalar_t *bottom_data = features.data_ptr<scalar_t>();
+        scalar_t *top_data = rfeatures.data_ptr<scalar_t>();
+        const int dh = divideUP(channels, kTileDim);
+        const int dw = divideUP(input_height * input_width, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, channels, input_height * input_width, dh, dw,
+                bottom_data, top_data);
+      }));
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.scalar_type(), "NCHW2NHWC_Masks", ([&] {
+        const scalar_t *bottom_data = masks.data_ptr<scalar_t>();
+        scalar_t *top_data = rmasks.data_ptr<scalar_t>();
+        const int dh = divideUP(mask_channels, kTileDim);
+        const int dw = divideUP(output_height * output_width, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, mask_channels, output_height * output_width, dh, dw,
+                bottom_data, top_data);
+      }));
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.scalar_type(), "CARAFELaucherForward", ([&] {
+        const int num_kernels =
+            batch_size * output_height * output_width * THREADS_PER_PIXEL;
+        const scalar_t *bottom_data = rfeatures.data_ptr<scalar_t>();
+        const scalar_t *bottom_masks = rmasks.data_ptr<scalar_t>();
+        scalar_t *top_data = routput.data_ptr<scalar_t>();
+
+        CARAFEForward<scalar_t><<<divideUP(num_kernels, THREADS_PER_BLOCK),
+                                  THREADS_PER_BLOCK, 0, stream>>>(
+            num_kernels, bottom_data, bottom_masks, kernel_size, group_size,
+            scale_factor, channels, input_height, input_width, output_height,
+            output_width, mask_channels, top_data);
+      }));
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.scalar_type(), "NHWC2NCHW", ([&] {
+        const scalar_t *bottom_data = routput.data_ptr<scalar_t>();
+        scalar_t *top_data = output.data_ptr<scalar_t>();
+        const int dh = divideUP(output_height * output_width, kTileDim);
+        const int dw = divideUP(channels, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, output_height * output_width, channels, dh, dw,
+                bottom_data, top_data);
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void CARAFEBackwardCUDAKernelLauncher(
+    const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
+    Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
+    Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
+    const int kernel_size, const int group_size, const int scale_factor) {
+  const int batch_size = top_grad.size(0);
+  const int channels = top_grad.size(1);
+  const int output_height = top_grad.size(2);
+  const int output_width = top_grad.size(3);
+
+  const int input_height = bottom_grad.size(2);
+  const int input_width = bottom_grad.size(3);
+
+  const int mask_channels = masks.size(1);
+
+  rtop_grad.resize_({batch_size, output_height, output_width, channels});
+  rbottom_grad.resize_({batch_size, input_height, input_width, channels});
+  rbottom_grad_hs.resize_({batch_size, output_height, output_width, channels});
+  rmask_grad.resize_({batch_size, output_height, output_width, mask_channels});
+
+  at::cuda::CUDAGuard device_guard(top_grad.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "NCHW2NHWC_Top_Grad", ([&] {
+        const scalar_t *bottom_data = top_grad.data_ptr<scalar_t>();
+        scalar_t *top_data = rtop_grad.data_ptr<scalar_t>();
+        const int dh = divideUP(channels, kTileDim);
+        const int dw = divideUP(output_height * output_width, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, channels, output_height * output_width, dh, dw,
+                bottom_data, top_data);
+      }));
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "CARAFELaucherBackward_Feature", ([&] {
+        const int num_kernels =
+            batch_size * output_height * output_width * THREADS_PER_PIXEL;
+        const scalar_t *top_diff = rtop_grad.data_ptr<scalar_t>();
+        const scalar_t *bottom_masks = masks.data_ptr<scalar_t>();
+        scalar_t *bottom_diff = rbottom_grad_hs.data_ptr<scalar_t>();
+
+        CARAFEBackward_Feature<scalar_t>
+            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
+               stream>>>(num_kernels, top_diff, bottom_masks, kernel_size,
+                         group_size, scale_factor, channels, input_height,
+                         input_width, output_height, output_width,
+                         mask_channels, bottom_diff);
+      }));
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "FeatureSum", ([&] {
+        const int num_kernels =
+            batch_size * input_height * input_width * THREADS_PER_PIXEL;
+        const scalar_t *bottom_diff_hs = rbottom_grad_hs.data_ptr<scalar_t>();
+        scalar_t *bottom_diff = rbottom_grad.data_ptr<scalar_t>();
+
+        FeatureSum<scalar_t>
+            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
+               stream>>>(num_kernels, bottom_diff_hs, scale_factor, channels,
+                         input_height, input_width, bottom_diff);
+      }));
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "NHWC2NCHW_Bottom_Grad", ([&] {
+        const scalar_t *bottom_data = rbottom_grad.data_ptr<scalar_t>();
+        scalar_t *top_data = bottom_grad.data_ptr<scalar_t>();
+        const int dh = divideUP(input_height * input_width, kTileDim);
+        const int dw = divideUP(channels, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, input_height * input_width, channels, dh, dw,
+                bottom_data, top_data);
+      }));
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "CARAFELaucherBackward_Mask", ([&] {
+        const int num_kernels = batch_size * output_height * output_width *
+                                mask_channels * WARP_SIZE;
+        const scalar_t *top_diff = rtop_grad.data_ptr<scalar_t>();
+        const scalar_t *bottom_data = rfeatures.data_ptr<scalar_t>();
+        scalar_t *mask_diff = rmask_grad.data_ptr<scalar_t>();
+
+        CARAFEBackward_Mask<scalar_t>
+            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
+               stream>>>(num_kernels, top_diff, bottom_data, kernel_size,
+                         group_size, scale_factor, channels, input_height,
+                         input_width, output_height, output_width,
+                         mask_channels, mask_diff);
+      }));
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "NHWC2NCHW_Mask_Grad", ([&] {
+        const scalar_t *bottom_data = rmask_grad.data_ptr<scalar_t>();
+        scalar_t *top_data = mask_grad.data_ptr<scalar_t>();
+        const int dh = divideUP(output_height * output_width, kTileDim);
+        const int dw = divideUP(mask_channels, kTileDim);
+        BatchTranspose2DCUDAKernel<scalar_t>
+            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
+                batch_size, output_height * output_width, mask_channels, dh, dw,
+                bottom_data, top_data);
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_naive.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_naive.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bb0aa0978b4a8331db0e167bd29e1653717253df
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_naive.cpp
@@ -0,0 +1,68 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
+                                          const Tensor masks, Tensor output,
+                                          const int kernel_size,
+                                          const int group_size,
+                                          const int scale_factor);
+
+void CARAFENAIVEBackwardCUDAKernelLauncher(
+    const Tensor top_grad, const Tensor features, const Tensor masks,
+    Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
+    const int group_size, const int scale_factor);
+
+void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
+                               int kernel_size, int group_size,
+                               int scale_factor) {
+  CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size,
+                                       group_size, scale_factor);
+}
+
+void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
+                                Tensor bottom_grad, Tensor mask_grad,
+                                int kernel_size, int group_size,
+                                int scale_factor) {
+  CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad,
+                                        mask_grad, kernel_size, group_size,
+                                        scale_factor);
+}
+#endif
+
+void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
+                          int kernel_size, int group_size, int scale_factor) {
+  if (features.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(features);
+    CHECK_CUDA_INPUT(masks);
+    CHECK_CUDA_INPUT(output);
+    carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size,
+                              scale_factor);
+#else
+    AT_ERROR("CarafeNaive is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("CarafeNaive is not implemented on CPU");
+  }
+}
+
+void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
+                           Tensor bottom_grad, Tensor mask_grad,
+                           int kernel_size, int group_size, int scale_factor) {
+  if (top_grad.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(top_grad);
+    CHECK_CUDA_INPUT(features);
+    CHECK_CUDA_INPUT(masks);
+    CHECK_CUDA_INPUT(bottom_grad);
+    CHECK_CUDA_INPUT(mask_grad);
+    carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad,
+                               mask_grad, kernel_size, group_size,
+                               scale_factor);
+#else
+    AT_ERROR("CarafeNaive is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("CarafeNaive is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..ffc05c8fa588b98ee5ab3432ec146a928ac2509e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu
@@ -0,0 +1,51 @@
+#include "carafe_naive_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
+                                          const Tensor masks, Tensor output,
+                                          const int kernel_size,
+                                          const int group_size,
+                                          const int scale_factor) {
+  int output_size = output.numel();
+  int channels = output.size(1);
+  int height = output.size(2);
+  int width = output.size(3);
+
+  at::cuda::CUDAGuard device_guard(features.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.scalar_type(), "CARAFENAIVEForward", ([&] {
+        carafe_naive_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, features.data_ptr<scalar_t>(),
+                masks.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+                kernel_size, group_size, scale_factor, channels, height, width);
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void CARAFENAIVEBackwardCUDAKernelLauncher(
+    const Tensor top_grad, const Tensor features, const Tensor masks,
+    Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
+    const int group_size, const int scale_factor) {
+  int output_size = top_grad.numel();
+  int channels = top_grad.size(1);
+  int height = top_grad.size(2);
+  int width = top_grad.size(3);
+
+  at::cuda::CUDAGuard device_guard(top_grad.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      top_grad.scalar_type(), "CARAFENAIVEBackward", ([&] {
+        carafe_naive_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, top_grad.data_ptr<scalar_t>(),
+                features.data_ptr<scalar_t>(), masks.data_ptr<scalar_t>(),
+                bottom_grad.data_ptr<scalar_t>(),
+                mask_grad.data_ptr<scalar_t>(), kernel_size, group_size,
+                scale_factor, channels, height, width);
+      }));
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/cc_attention.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/cc_attention.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9d7e48a4daac1e9d3ad366e975152e4a45a6783b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/cc_attention.cpp
@@ -0,0 +1,98 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void CAForwardCUDAKernelLauncher(const Tensor t, const Tensor f, Tensor weight);
+
+void CABackwardCUDAKernelLauncher(const Tensor dw, const Tensor t,
+                                  const Tensor f, Tensor dt, Tensor df);
+
+void CAMapForwardCUDAKernelLauncher(const Tensor weight, const Tensor g,
+                                    Tensor out);
+
+void CAMapBackwardCUDAKernelLauncher(const Tensor dout, const Tensor weight,
+                                     const Tensor g, Tensor dw, Tensor dg);
+
+void ca_forward_cuda(const Tensor t, const Tensor f, Tensor weight) {
+  CAForwardCUDAKernelLauncher(t, f, weight);
+}
+
+void ca_backward_cuda(const Tensor dw, const Tensor t, const Tensor f,
+                      Tensor dt, Tensor df) {
+  CABackwardCUDAKernelLauncher(dw, t, f, dt, df);
+}
+
+void ca_map_forward_cuda(const Tensor weight, const Tensor g, Tensor out) {
+  CAMapForwardCUDAKernelLauncher(weight, g, out);
+}
+
+void ca_map_backward_cuda(const Tensor dout, const Tensor weight,
+                          const Tensor g, Tensor dw, Tensor dg) {
+  CAMapBackwardCUDAKernelLauncher(dout, weight, g, dw, dg);
+}
+#endif
+
+void ca_forward(const Tensor t, const Tensor f, Tensor weight) {
+  if (t.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(t);
+    CHECK_CUDA_INPUT(f);
+    CHECK_CUDA_INPUT(weight);
+    ca_forward_cuda(t, f, weight);
+#else
+    AT_ERROR("ca is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ca is not implemented on the CPU");
+  }
+}
+
+void ca_backward(const Tensor dw, const Tensor t, const Tensor f, Tensor dt,
+                 Tensor df) {
+  if (dw.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(dw);
+    CHECK_CUDA_INPUT(t);
+    CHECK_CUDA_INPUT(f);
+    CHECK_CUDA_INPUT(dt);
+    CHECK_CUDA_INPUT(df);
+    ca_backward_cuda(dw, t, f, dt, df);
+#else
+    AT_ERROR("ca is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ca is not implemented on the CPU");
+  }
+}
+
+void ca_map_forward(const Tensor weight, const Tensor g, Tensor out) {
+  if (weight.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(g);
+    CHECK_CUDA_INPUT(out);
+    ca_map_forward_cuda(weight, g, out);
+#else
+    AT_ERROR("ca_map is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ca is not implemented on the CPU");
+  }
+}
+
+void ca_map_backward(const Tensor dout, const Tensor weight, const Tensor g,
+                     Tensor dw, Tensor dg) {
+  if (dout.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(dout);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(g);
+    CHECK_CUDA_INPUT(dw);
+    CHECK_CUDA_INPUT(dg);
+    ca_map_backward_cuda(dout, weight, g, dw, dg);
+#else
+    AT_ERROR("ca_map is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ca is not implemented on the CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b948d5406ac4aac2475ba386fb1adc9519748ad5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu
@@ -0,0 +1,142 @@
+// Modified from
+// https://github.com/LikeLy-Journey/SegmenTron/blob/master/segmentron/modules/csrc/criss_cross_attention/ca_cuda.cu
+
+#include <THC/THC.h>
+
+#include <THC/THCDeviceUtils.cuh>
+
+#include "cc_attention_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void CAForwardCUDAKernelLauncher(const Tensor t, const Tensor f,
+                                 Tensor weight) {
+  AT_ASSERTM(t.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(f.device().is_cuda(), "input must be a CUDA tensor");
+
+  auto n = t.size(0);
+  auto c = t.size(1);
+  auto h = t.size(2);
+  auto w = t.size(3);
+
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = h + w;
+  dim3 blocks(d1, d2, d3);
+
+  AT_DISPATCH_FLOATING_TYPES(t.scalar_type(), "ca_forward", [&] {
+    ca_forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        t.contiguous().data_ptr<scalar_t>(),
+        f.contiguous().data_ptr<scalar_t>(),
+        weight.contiguous().data_ptr<scalar_t>(), n, c, h, w);
+  });
+  THCudaCheck(cudaGetLastError());
+}
+
+void CABackwardCUDAKernelLauncher(const Tensor dw, const Tensor t,
+                                  const Tensor f, Tensor dt, Tensor df) {
+  AT_ASSERTM(dw.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(t.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(f.device().is_cuda(), "input must be a CUDA tensor");
+
+  auto n = t.size(0);
+  auto c = t.size(1);
+  auto h = t.size(2);
+  auto w = t.size(3);
+
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = c;
+  dim3 blocks(d1, d2, d3);
+
+  AT_DISPATCH_FLOATING_TYPES(t.scalar_type(), "ca_backward_kernel_t", [&] {
+    ca_backward_kernel_t<scalar_t><<<blocks, threads, 0, stream>>>(
+        dw.contiguous().data_ptr<scalar_t>(),
+        t.contiguous().data_ptr<scalar_t>(),
+        f.contiguous().data_ptr<scalar_t>(),
+        dt.contiguous().data_ptr<scalar_t>(), n, c, h, w);
+  });
+
+  AT_DISPATCH_FLOATING_TYPES(f.scalar_type(), "ca_backward_kernel_f", [&] {
+    ca_backward_kernel_f<scalar_t><<<blocks, threads, 0, stream>>>(
+        dw.contiguous().data_ptr<scalar_t>(),
+        t.contiguous().data_ptr<scalar_t>(),
+        f.contiguous().data_ptr<scalar_t>(),
+        df.contiguous().data_ptr<scalar_t>(), n, c, h, w);
+  });
+  THCudaCheck(cudaGetLastError());
+}
+
+void CAMapForwardCUDAKernelLauncher(const Tensor weight, const Tensor g,
+                                    Tensor out) {
+  AT_ASSERTM(weight.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(g.device().is_cuda(), "input must be a CUDA tensor");
+
+  auto n = g.size(0);
+  auto c = g.size(1);
+  auto h = g.size(2);
+  auto w = g.size(3);
+
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = c;
+  dim3 blocks(d1, d2, d3);
+
+  AT_DISPATCH_FLOATING_TYPES(g.scalar_type(), "ca_map_forward", [&] {
+    ca_map_forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        weight.contiguous().data_ptr<scalar_t>(),
+        g.contiguous().data_ptr<scalar_t>(),
+        out.contiguous().data_ptr<scalar_t>(), n, c, h, w);
+  });
+  THCudaCheck(cudaGetLastError());
+}
+
+void CAMapBackwardCUDAKernelLauncher(const Tensor dout, const Tensor weight,
+                                     const Tensor g, Tensor dw, Tensor dg) {
+  AT_ASSERTM(dout.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(weight.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(g.device().is_cuda(), "input must be a CUDA tensor");
+
+  auto n = dout.size(0);
+  auto c = dout.size(1);
+  auto h = dout.size(2);
+  auto w = dout.size(3);
+
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (w + threads.x - 1) / threads.x;
+  int d2 = (h + threads.y - 1) / threads.y;
+  int d3 = h + w;
+  dim3 blocks(d1, d2, d3);
+
+  AT_DISPATCH_FLOATING_TYPES(
+      weight.scalar_type(), "ca_map_backward_kernel_w", [&] {
+        ca_map_backward_kernel_w<scalar_t><<<blocks, threads, 0, stream>>>(
+            dout.contiguous().data_ptr<scalar_t>(),
+            weight.contiguous().data_ptr<scalar_t>(),
+            g.contiguous().data_ptr<scalar_t>(),
+            dw.contiguous().data_ptr<scalar_t>(), n, c, h, w);
+      });
+
+  AT_DISPATCH_FLOATING_TYPES(g.scalar_type(), "ca_map_backward_kernel_g", [&] {
+    ca_map_backward_kernel_g<scalar_t><<<blocks, threads, 0, stream>>>(
+        dout.contiguous().data_ptr<scalar_t>(),
+        weight.contiguous().data_ptr<scalar_t>(),
+        g.contiguous().data_ptr<scalar_t>(),
+        dg.contiguous().data_ptr<scalar_t>(), n, c, h, w);
+  });
+  THCudaCheck(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/corner_pool.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/corner_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..58da7e9ea636ec68cb3159418073c9d1357adb0a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/corner_pool.cpp
@@ -0,0 +1,239 @@
+// Modified from
+// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src
+#include "pytorch_cpp_helper.hpp"
+
+Tensor bottom_pool_forward(Tensor input) {
+  // Initialize output
+  Tensor output = at::zeros_like(input);
+  // Get height
+  int64_t height = input.size(2);
+  output.copy_(input);
+
+  for (int64_t ind = 1; ind < height; ind <<= 1) {
+    Tensor max_temp = at::slice(output, 2, ind, height);
+    Tensor cur_temp = at::slice(output, 2, ind, height).clone();
+    Tensor next_temp = at::slice(output, 2, 0, height - ind).clone();
+    at::max_out(max_temp, cur_temp, next_temp);
+  }
+
+  return output;
+}
+
+Tensor bottom_pool_backward(Tensor input, Tensor grad_output) {
+  auto output = at::zeros_like(input);
+
+  int32_t batch = input.size(0);
+  int32_t channel = input.size(1);
+  int32_t height = input.size(2);
+  int32_t width = input.size(3);
+
+  auto max_val = torch::zeros({batch, channel, width},
+                              at::device(at::kCUDA).dtype(at::kFloat));
+  auto max_ind = torch::zeros({batch, channel, width},
+                              at::device(at::kCUDA).dtype(at::kLong));
+
+  auto input_temp = input.select(2, 0);
+  max_val.copy_(input_temp);
+
+  max_ind.fill_(0);
+
+  auto output_temp = output.select(2, 0);
+  auto grad_output_temp = grad_output.select(2, 0);
+  output_temp.copy_(grad_output_temp);
+
+  auto un_max_ind = max_ind.unsqueeze(2);
+  auto gt_mask = torch::zeros({batch, channel, width},
+                              at::device(at::kCUDA).dtype(at::kBool));
+  auto max_temp = torch::zeros({batch, channel, width},
+                               at::device(at::kCUDA).dtype(at::kFloat));
+  for (int32_t ind = 0; ind < height - 1; ++ind) {
+    input_temp = input.select(2, ind + 1);
+    at::gt_out(gt_mask, input_temp, max_val);
+
+    at::masked_select_out(max_temp, input_temp, gt_mask);
+    max_val.masked_scatter_(gt_mask, max_temp);
+    max_ind.masked_fill_(gt_mask, ind + 1);
+
+    grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
+    output.scatter_add_(2, un_max_ind, grad_output_temp);
+  }
+
+  return output;
+}
+
+Tensor left_pool_forward(Tensor input) {
+  // Initialize output
+  Tensor output = at::zeros_like(input);
+  // Get width
+  int64_t width = input.size(3);
+  output.copy_(input);
+
+  for (int64_t ind = 1; ind < width; ind <<= 1) {
+    Tensor max_temp = at::slice(output, 3, 0, width - ind);
+    Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone();
+    Tensor next_temp = at::slice(output, 3, ind, width).clone();
+    at::max_out(max_temp, cur_temp, next_temp);
+  }
+
+  return output;
+}
+
+Tensor left_pool_backward(Tensor input, Tensor grad_output) {
+  auto output = at::zeros_like(input);
+
+  int32_t batch = input.size(0);
+  int32_t channel = input.size(1);
+  int32_t height = input.size(2);
+  int32_t width = input.size(3);
+
+  auto max_val = torch::zeros({batch, channel, height},
+                              at::device(at::kCUDA).dtype(at::kFloat));
+  auto max_ind = torch::zeros({batch, channel, height},
+                              at::device(at::kCUDA).dtype(at::kLong));
+
+  auto input_temp = input.select(3, width - 1);
+  max_val.copy_(input_temp);
+
+  max_ind.fill_(width - 1);
+
+  auto output_temp = output.select(3, width - 1);
+  auto grad_output_temp = grad_output.select(3, width - 1);
+  output_temp.copy_(grad_output_temp);
+
+  auto un_max_ind = max_ind.unsqueeze(3);
+  auto gt_mask = torch::zeros({batch, channel, height},
+                              at::device(at::kCUDA).dtype(at::kBool));
+  auto max_temp = torch::zeros({batch, channel, height},
+                               at::device(at::kCUDA).dtype(at::kFloat));
+  for (int32_t ind = 1; ind < width; ++ind) {
+    input_temp = input.select(3, width - ind - 1);
+    at::gt_out(gt_mask, input_temp, max_val);
+
+    at::masked_select_out(max_temp, input_temp, gt_mask);
+    max_val.masked_scatter_(gt_mask, max_temp);
+    max_ind.masked_fill_(gt_mask, width - ind - 1);
+
+    grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
+    output.scatter_add_(3, un_max_ind, grad_output_temp);
+  }
+
+  return output;
+}
+
+Tensor right_pool_forward(Tensor input) {
+  // Initialize output
+  Tensor output = at::zeros_like(input);
+  // Get width
+  int64_t width = input.size(3);
+  output.copy_(input);
+
+  for (int64_t ind = 1; ind < width; ind <<= 1) {
+    Tensor max_temp = at::slice(output, 3, ind, width);
+    Tensor cur_temp = at::slice(output, 3, ind, width).clone();
+    Tensor next_temp = at::slice(output, 3, 0, width - ind).clone();
+    at::max_out(max_temp, cur_temp, next_temp);
+  }
+
+  return output;
+}
+
+Tensor right_pool_backward(Tensor input, Tensor grad_output) {
+  Tensor output = at::zeros_like(input);
+
+  int32_t batch = input.size(0);
+  int32_t channel = input.size(1);
+  int32_t height = input.size(2);
+  int32_t width = input.size(3);
+
+  auto max_val = torch::zeros({batch, channel, height},
+                              at::device(at::kCUDA).dtype(at::kFloat));
+  auto max_ind = torch::zeros({batch, channel, height},
+                              at::device(at::kCUDA).dtype(at::kLong));
+
+  auto input_temp = input.select(3, 0);
+  max_val.copy_(input_temp);
+
+  max_ind.fill_(0);
+
+  auto output_temp = output.select(3, 0);
+  auto grad_output_temp = grad_output.select(3, 0);
+  output_temp.copy_(grad_output_temp);
+
+  auto un_max_ind = max_ind.unsqueeze(3);
+  auto gt_mask = torch::zeros({batch, channel, height},
+                              at::device(at::kCUDA).dtype(at::kBool));
+  auto max_temp = torch::zeros({batch, channel, height},
+                               at::device(at::kCUDA).dtype(at::kFloat));
+  for (int32_t ind = 0; ind < width - 1; ++ind) {
+    input_temp = input.select(3, ind + 1);
+    at::gt_out(gt_mask, input_temp, max_val);
+
+    at::masked_select_out(max_temp, input_temp, gt_mask);
+    max_val.masked_scatter_(gt_mask, max_temp);
+    max_ind.masked_fill_(gt_mask, ind + 1);
+
+    grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
+    output.scatter_add_(3, un_max_ind, grad_output_temp);
+  }
+
+  return output;
+}
+
+Tensor top_pool_forward(Tensor input) {
+  // Initialize output
+  Tensor output = at::zeros_like(input);
+  // Get height
+  int64_t height = input.size(2);
+  output.copy_(input);
+
+  for (int64_t ind = 1; ind < height; ind <<= 1) {
+    Tensor max_temp = at::slice(output, 2, 0, height - ind);
+    Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone();
+    Tensor next_temp = at::slice(output, 2, ind, height).clone();
+    at::max_out(max_temp, cur_temp, next_temp);
+  }
+
+  return output;
+}
+
+Tensor top_pool_backward(Tensor input, Tensor grad_output) {
+  auto output = at::zeros_like(input);
+
+  int32_t batch = input.size(0);
+  int32_t channel = input.size(1);
+  int32_t height = input.size(2);
+  int32_t width = input.size(3);
+
+  auto max_val = torch::zeros({batch, channel, width},
+                              at::device(at::kCUDA).dtype(at::kFloat));
+  auto max_ind = torch::zeros({batch, channel, width},
+                              at::device(at::kCUDA).dtype(at::kLong));
+
+  auto input_temp = input.select(2, height - 1);
+  max_val.copy_(input_temp);
+
+  max_ind.fill_(height - 1);
+
+  auto output_temp = output.select(2, height - 1);
+  auto grad_output_temp = grad_output.select(2, height - 1);
+  output_temp.copy_(grad_output_temp);
+
+  auto un_max_ind = max_ind.unsqueeze(2);
+  auto gt_mask = torch::zeros({batch, channel, width},
+                              at::device(at::kCUDA).dtype(at::kBool));
+  auto max_temp = torch::zeros({batch, channel, width},
+                               at::device(at::kCUDA).dtype(at::kFloat));
+  for (int32_t ind = 1; ind < height; ++ind) {
+    input_temp = input.select(2, height - ind - 1);
+    at::gt_out(gt_mask, input_temp, max_val);
+
+    at::masked_select_out(max_temp, input_temp, gt_mask);
+    max_val.masked_scatter_(gt_mask, max_temp);
+    max_ind.masked_fill_(gt_mask, height - ind - 1);
+
+    grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
+    output.scatter_add_(2, un_max_ind, grad_output_temp);
+  }
+
+  return output;
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_conv.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9b2dc4495038f63b7c5526e36f46ec5f95fd5aad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_conv.cpp
@@ -0,0 +1,138 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void DeformConvForwardCUDAKernelLauncher(Tensor input, Tensor weight,
+                                         Tensor offset, Tensor output,
+                                         Tensor columns, Tensor ones, int kW,
+                                         int kH, int dW, int dH, int padW,
+                                         int padH, int dilationW, int dilationH,
+                                         int group, int deformable_group,
+                                         int im2col_step);
+
+void DeformConvBackwardInputCUDAKernelLauncher(
+    Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput,
+    Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW,
+    int dH, int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, int im2col_step);
+
+void DeformConvBackwardParametersCUDAKernelLauncher(
+    Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight,
+    Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW,
+    int padH, int dilationW, int dilationH, int group, int deformable_group,
+    float scale, int im2col_step);
+
+void deform_conv_forward_cuda(Tensor input, Tensor weight, Tensor offset,
+                              Tensor output, Tensor columns, Tensor ones,
+                              int kW, int kH, int dW, int dH, int padW,
+                              int padH, int dilationW, int dilationH, int group,
+                              int deformable_group, int im2col_step) {
+  DeformConvForwardCUDAKernelLauncher(
+      input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH,
+      dilationW, dilationH, group, deformable_group, im2col_step);
+}
+
+void deform_conv_backward_input_cuda(Tensor input, Tensor offset,
+                                     Tensor gradOutput, Tensor gradInput,
+                                     Tensor gradOffset, Tensor weight,
+                                     Tensor columns, int kW, int kH, int dW,
+                                     int dH, int padW, int padH, int dilationW,
+                                     int dilationH, int group,
+                                     int deformable_group, int im2col_step) {
+  DeformConvBackwardInputCUDAKernelLauncher(
+      input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH,
+      dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
+      im2col_step);
+}
+
+void deform_conv_backward_parameters_cuda(
+    Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight,
+    Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW,
+    int padH, int dilationW, int dilationH, int group, int deformable_group,
+    float scale, int im2col_step) {
+  DeformConvBackwardParametersCUDAKernelLauncher(
+      input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH,
+      padW, padH, dilationW, dilationH, group, deformable_group, scale,
+      im2col_step);
+}
+#endif
+
+void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
+                         Tensor output, Tensor columns, Tensor ones, int kW,
+                         int kH, int dW, int dH, int padW, int padH,
+                         int dilationW, int dilationH, int group,
+                         int deformable_group, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(output);
+    CHECK_CUDA_INPUT(columns);
+    CHECK_CUDA_INPUT(ones);
+
+    deform_conv_forward_cuda(input, weight, offset, output, columns, ones, kW,
+                             kH, dW, dH, padW, padH, dilationW, dilationH,
+                             group, deformable_group, im2col_step);
+#else
+    AT_ERROR("DeformConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("DeformConv is not implemented on CPU");
+  }
+}
+
+void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
+                                Tensor gradInput, Tensor gradOffset,
+                                Tensor weight, Tensor columns, int kW, int kH,
+                                int dW, int dH, int padW, int padH,
+                                int dilationW, int dilationH, int group,
+                                int deformable_group, int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(gradOutput);
+    CHECK_CUDA_INPUT(gradInput);
+    CHECK_CUDA_INPUT(gradOffset);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(columns);
+
+    deform_conv_backward_input_cuda(input, offset, gradOutput, gradInput,
+                                    gradOffset, weight, columns, kW, kH, dW, dH,
+                                    padW, padH, dilationW, dilationH, group,
+                                    deformable_group, im2col_step);
+#else
+    AT_ERROR("DeformConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("DeformConv is not implemented on CPU");
+  }
+}
+
+void deform_conv_backward_parameters(Tensor input, Tensor offset,
+                                     Tensor gradOutput, Tensor gradWeight,
+                                     Tensor columns, Tensor ones, int kW,
+                                     int kH, int dW, int dH, int padW, int padH,
+                                     int dilationW, int dilationH, int group,
+                                     int deformable_group, float scale,
+                                     int im2col_step) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(gradOutput);
+    CHECK_CUDA_INPUT(gradWeight);
+    CHECK_CUDA_INPUT(columns);
+    CHECK_CUDA_INPUT(ones);
+
+    deform_conv_backward_parameters_cuda(input, offset, gradOutput, gradWeight,
+                                         columns, ones, kW, kH, dW, dH, padW,
+                                         padH, dilationW, dilationH, group,
+                                         deformable_group, scale, im2col_step);
+#else
+    AT_ERROR("DeformConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("DeformConv is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..05f9b1a8e8a4e0834033eabffc21c5e29b4d7420
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu
@@ -0,0 +1,519 @@
+#include "deform_conv_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void deformable_im2col(Tensor data_im, Tensor data_offset, const int channels,
+                       const int height, const int width, const int ksize_h,
+                       const int ksize_w, const int pad_h, const int pad_w,
+                       const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       Tensor data_col) {
+  // num_axes should be smaller than block size
+  // todo: check parallel_imgs is correctly passed in
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels),
+                                       THREADS_PER_BLOCK, 0,
+                                       at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_im_, data_offset_, height, width, ksize_h,
+            ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, parallel_imgs, channels,
+            deformable_group, height_col, width_col, data_col_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void deformable_col2im(Tensor data_col, Tensor data_offset, const int channels,
+                       const int height, const int width, const int ksize_h,
+                       const int ksize_w, const int pad_h, const int pad_w,
+                       const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs, const int deformable_group,
+                       Tensor grad_im) {
+  // todo: make sure parallel_imgs is passed in correctly
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels =
+      channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels),
+                                       THREADS_PER_BLOCK, 0,
+                                       at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_offset_, channels, height, width,
+            ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
+            dilation_w, channel_per_deformable_group, parallel_imgs,
+            deformable_group, height_col, width_col, grad_im_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void deformable_col2im_coord(
+    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
+    const int height, const int width, const int ksize_h, const int ksize_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int parallel_imgs,
+    const int deformable_group, Tensor grad_offset) {
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w *
+                    deformable_group * parallel_imgs;
+  int channel_per_deformable_group =
+      channels * ksize_h * ksize_w / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
+
+        deformable_col2im_coord_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
+            at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_im_, data_offset_, channels, height,
+            width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs,
+            2 * ksize_h * ksize_w * deformable_group, deformable_group,
+            height_col, width_col, grad_offset_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void deform_conv_shape_check(Tensor input, Tensor offset, Tensor *gradOutput,
+                             Tensor weight, int kH, int kW, int dH, int dW,
+                             int padH, int padW, int dilationH, int dilationW,
+                             int group, int deformable_group) {
+  TORCH_CHECK(
+      weight.ndimension() == 4,
+      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s",
+      weight.ndimension());
+
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+  TORCH_CHECK(kW > 0 && kH > 0,
+              "kernel size should be greater than zero, but got kH: %d kW: %d",
+              kH, kW);
+
+  TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
+              "kernel size should be consistent with weight, ",
+              "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d",
+              kH, kW, weight.size(2), weight.size(3));
+
+  TORCH_CHECK(dW > 0 && dH > 0,
+              "stride should be greater than zero, but got dH: %d dW: %d", dH,
+              dW);
+
+  TORCH_CHECK(
+      dilationW > 0 && dilationH > 0,
+      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
+      dilationH, dilationW);
+
+  int ndim = input.ndimension();
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  TORCH_CHECK(ndim == 3 || ndim == 4,
+              "3D or 4D input tensor expected but got: %s", ndim);
+
+  long nInputPlane = weight.size(1) * group;
+  long inputHeight = input.size(dimh);
+  long inputWidth = input.size(dimw);
+  long nOutputPlane = weight.size(0);
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+  TORCH_CHECK(nInputPlane % deformable_group == 0,
+              "input channels must divide deformable group size");
+
+  if (outputWidth < 1 || outputHeight < 1)
+    AT_ERROR(
+        "Given input size: (%ld x %ld x %ld). "
+        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
+        outputWidth);
+
+  TORCH_CHECK(input.size(1) == nInputPlane,
+              "invalid number of input planes, expected: %d, but got: %d",
+              nInputPlane, input.size(1));
+
+  TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
+              "input image is smaller than kernel");
+
+  TORCH_CHECK(
+      (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
+      "invalid spatial size of offset, expected height: %d width: %d, but "
+      "got height: %d width: %d",
+      outputHeight, outputWidth, offset.size(2), offset.size(3));
+
+  TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
+              "invalid number of channels of offset");
+
+  if (gradOutput != NULL) {
+    TORCH_CHECK(
+        gradOutput->size(dimf) == nOutputPlane,
+        "invalid number of gradOutput planes, expected: %d, but got: %d",
+        nOutputPlane, gradOutput->size(dimf));
+
+    TORCH_CHECK(
+        (gradOutput->size(dimh) == outputHeight &&
+         gradOutput->size(dimw) == outputWidth),
+        "invalid size of gradOutput, expected height: %d width: %d , but "
+        "got height: %d width: %d",
+        outputHeight, outputWidth, gradOutput->size(dimh),
+        gradOutput->size(dimw));
+  }
+}
+
+void DeformConvForwardCUDAKernelLauncher(Tensor input, Tensor weight,
+                                         Tensor offset, Tensor output,
+                                         Tensor columns, Tensor ones, int kW,
+                                         int kH, int dW, int dH, int padW,
+                                         int padH, int dilationW, int dilationH,
+                                         int group, int deformable_group,
+                                         int im2col_step) {
+  // todo: resize columns to include im2col: done
+  // todo: add im2col_step as input
+  // todo: add new output buffer and transpose it to output (or directly
+  // transpose output) todo: possibly change data indexing because of
+  // parallel_imgs
+
+  deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH,
+                          padW, dilationH, dilationW, group, deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  int batch = 1;
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input.unsqueeze_(0);
+    offset.unsqueeze_(0);
+  }
+
+  // todo: assert batchsize dividable by im2col_step
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
+                        outputHeight, outputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
+    ones = at::ones({outputHeight, outputWidth}, input.options());
+  }
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane,
+                                    im2col_step * outputHeight, outputWidth},
+                                   output.options());
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0), group, output_buffer.size(1) / group,
+       output_buffer.size(2), output_buffer.size(3)});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      output_buffer[elt][g] = output_buffer[elt][g]
+                                  .flatten(1)
+                                  .addmm_(weight[g].flatten(1), columns[g])
+                                  .view_as(output_buffer[elt][g]);
+    }
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+  }
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
+       output_buffer.size(3), output_buffer.size(4)});
+
+  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
+                                      im2col_step, outputHeight, outputWidth});
+  output_buffer.transpose_(1, 2);
+  output.copy_(output_buffer);
+  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    output = output.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+}
+
+void DeformConvBackwardInputCUDAKernelLauncher(
+    Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput,
+    Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW,
+    int dH, int padW, int padH, int dilationW, int dilationH, int group,
+    int deformable_group, int im2col_step) {
+  deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW,
+                          padH, padW, dilationH, dilationW, group,
+                          deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.size(0), input.size(1), input.size(2)});
+    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  // change order of grad output
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                              inputHeight, inputWidth});
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
+                                deformable_group * 2 * kH * kW, outputHeight,
+                                outputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    // divide into groups
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0), group, gradOutput.size(1) / group,
+         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
+         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});
+
+    deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane,
+                            inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+                            dilationH, dilationW, im2col_step, deformable_group,
+                            gradOffset[elt]);
+
+    deformable_col2im(columns, offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, gradInput[elt]);
+  }
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  gradOffset = gradOffset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+    gradOffset =
+        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+}
+
+void DeformConvBackwardParametersCUDAKernelLauncher(
+    Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight,
+    Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW,
+    int padH, int dilationW, int dilationH, int group, int deformable_group,
+    float scale, int im2col_step) {
+  // todo: transpose and reshape outGrad
+  // todo: reshape columns
+  // todo: add im2col_step as input
+
+  deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH,
+                          dW, padH, padW, dilationH, dilationW, group,
+                          deformable_group);
+  at::DeviceGuard guard(input.device());
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view(
+        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = gradWeight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+                                nOutputPlane, outputHeight, outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  Tensor gradOutputBuffer = at::zeros_like(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
+                             outputHeight, outputWidth});
+  gradOutputBuffer = gradOutputBuffer.contiguous();
+  gradOutputBuffer.copy_(gradOutput);
+  gradOutputBuffer =
+      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
+                             im2col_step * outputHeight, outputWidth});
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+                      inputHeight, inputWidth});
+  offset =
+      offset.view({batchSize / im2col_step, im2col_step,
+                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+                      dilationW, im2col_step, deformable_group, columns);
+
+    // divide into group
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
+         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    gradWeight =
+        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
+                         gradWeight.size(2), gradWeight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      gradWeight[g] = gradWeight[g]
+                          .flatten(1)
+                          .addmm_(gradOutputBuffer[elt][g].flatten(1),
+                                  columns[g].transpose(1, 0), 1.0, scale)
+                          .view_as(gradWeight[g]);
+    }
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0),
+         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
+         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
+                                  gradWeight.size(2), gradWeight.size(3),
+                                  gradWeight.size(4)});
+  }
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1022ea91f85a02610dbd3faae0c4a8ed48352155
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp
@@ -0,0 +1,81 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
+                                            Tensor offset, Tensor output,
+                                            int pooled_height, int pooled_width,
+                                            float spatial_scale,
+                                            int sampling_ratio, float gamma);
+
+void DeformRoIPoolBackwardCUDAKernelLauncher(
+    Tensor grad_output, Tensor input, Tensor rois, Tensor offset,
+    Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width,
+    float spatial_scale, int sampling_ratio, float gamma);
+
+void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
+                                  Tensor output, int pooled_height,
+                                  int pooled_width, float spatial_scale,
+                                  int sampling_ratio, float gamma) {
+  DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output,
+                                         pooled_height, pooled_width,
+                                         spatial_scale, sampling_ratio, gamma);
+}
+
+void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
+                                   Tensor rois, Tensor offset,
+                                   Tensor grad_input, Tensor grad_offset,
+                                   int pooled_height, int pooled_width,
+                                   float spatial_scale, int sampling_ratio,
+                                   float gamma) {
+  DeformRoIPoolBackwardCUDAKernelLauncher(
+      grad_output, input, rois, offset, grad_input, grad_offset, pooled_height,
+      pooled_width, spatial_scale, sampling_ratio, gamma);
+}
+#endif
+
+void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
+                             Tensor output, int pooled_height, int pooled_width,
+                             float spatial_scale, int sampling_ratio,
+                             float gamma) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(rois);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(output);
+
+    deform_roi_pool_forward_cuda(input, rois, offset, output, pooled_height,
+                                 pooled_width, spatial_scale, sampling_ratio,
+                                 gamma);
+#else
+    AT_ERROR("DeformRoIPool is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("DeformRoIPool is not implemented on CPU");
+  }
+}
+
+void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
+                              Tensor offset, Tensor grad_input,
+                              Tensor grad_offset, int pooled_height,
+                              int pooled_width, float spatial_scale,
+                              int sampling_ratio, float gamma) {
+  if (grad_output.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_output);
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(rois);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(grad_input);
+    CHECK_CUDA_INPUT(grad_offset);
+
+    deform_roi_pool_backward_cuda(grad_output, input, rois, offset, grad_input,
+                                  grad_offset, pooled_height, pooled_width,
+                                  spatial_scale, sampling_ratio, gamma);
+#else
+    AT_ERROR("DeformRoIPool is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("DeformRoIPool is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c856d6b821b8e5732ca0adda6b9fd1adccd0f099
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu
@@ -0,0 +1,54 @@
+#include "deform_roi_pool_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
+                                            Tensor offset, Tensor output,
+                                            int pooled_height, int pooled_width,
+                                            float spatial_scale,
+                                            int sampling_ratio, float gamma) {
+  int output_size = output.numel();
+  int channels = input.size(1);
+  int height = input.size(2);
+  int width = input.size(3);
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "deform_roi_pool_forward_cuda_kernel", [&] {
+        deform_roi_pool_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.data_ptr<scalar_t>(),
+                rois.data_ptr<scalar_t>(), offset.data_ptr<scalar_t>(),
+                output.data_ptr<scalar_t>(), pooled_height, pooled_width,
+                static_cast<scalar_t>(spatial_scale), sampling_ratio,
+                static_cast<scalar_t>(gamma), channels, height, width);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void DeformRoIPoolBackwardCUDAKernelLauncher(
+    Tensor grad_output, Tensor input, Tensor rois, Tensor offset,
+    Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width,
+    float spatial_scale, int sampling_ratio, float gamma) {
+  int output_size = grad_output.numel();
+  int channels = grad_input.size(1);
+  int height = grad_input.size(2);
+  int width = grad_input.size(3);
+
+  at::cuda::CUDAGuard device_guard(grad_output.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "deform_roi_pool_backward_cuda_kernel", [&] {
+        deform_roi_pool_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.data_ptr<scalar_t>(),
+                input.data_ptr<scalar_t>(), rois.data_ptr<scalar_t>(),
+                offset.data_ptr<scalar_t>(), grad_input.data_ptr<scalar_t>(),
+                grad_offset.data_ptr<scalar_t>(), pooled_height, pooled_width,
+                static_cast<scalar_t>(spatial_scale), sampling_ratio,
+                static_cast<scalar_t>(gamma), channels, height, width);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/focal_loss.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/focal_loss.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae650611e2f0f762c95cf031b21a464553273554
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/focal_loss.cpp
@@ -0,0 +1,130 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
+                                               Tensor weight, Tensor output,
+                                               const float gamma,
+                                               const float alpha);
+
+void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
+                                                Tensor weight,
+                                                Tensor grad_input,
+                                                const float gamma,
+                                                const float alpha);
+
+void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
+                                               Tensor weight, Tensor output,
+                                               const float gamma,
+                                               const float alpha);
+
+void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
+                                                Tensor weight, Tensor buff,
+                                                Tensor grad_input,
+                                                const float gamma,
+                                                const float alpha);
+
+void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
+                                     Tensor output, float gamma, float alpha) {
+  SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
+                                            gamma, alpha);
+}
+
+void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target,
+                                      Tensor weight, Tensor grad_input,
+                                      float gamma, float alpha) {
+  SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input,
+                                             gamma, alpha);
+}
+
+void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
+                                     Tensor output, float gamma, float alpha) {
+  SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
+                                            gamma, alpha);
+}
+
+void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
+                                      Tensor weight, Tensor buff,
+                                      Tensor grad_input, float gamma,
+                                      float alpha) {
+  SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff,
+                                             grad_input, gamma, alpha);
+}
+#endif
+
+void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
+                                Tensor output, float gamma, float alpha) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(target);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(output);
+
+    sigmoid_focal_loss_forward_cuda(input, target, weight, output, gamma,
+                                    alpha);
+#else
+    AT_ERROR("SigmoidFocalLoss is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SigmoidFocalLoss is not implemented on CPU");
+  }
+}
+
+void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
+                                 Tensor grad_input, float gamma, float alpha) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(target);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(grad_input);
+
+    sigmoid_focal_loss_backward_cuda(input, target, weight, grad_input, gamma,
+                                     alpha);
+#else
+    AT_ERROR("SigmoidFocalLoss is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SigmoidFocalLoss is not implemented on CPU");
+  }
+}
+
+void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
+                                Tensor output, float gamma, float alpha) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(target);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(output);
+
+    softmax_focal_loss_forward_cuda(input, target, weight, output, gamma,
+                                    alpha);
+#else
+    AT_ERROR("SoftmaxFocalLoss is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SoftmaxFocalLoss is not implemented on CPU");
+  }
+}
+
+void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
+                                 Tensor buff, Tensor grad_input, float gamma,
+                                 float alpha) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(target);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(buff);
+    CHECK_CUDA_INPUT(grad_input);
+
+    softmax_focal_loss_backward_cuda(input, target, weight, buff, grad_input,
+                                     gamma, alpha);
+#else
+    AT_ERROR("SoftmaxFocalLoss is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SoftmaxFocalLoss is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c7cd215f5d07b7f022c3d3e44012acce632681bd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu
@@ -0,0 +1,110 @@
+#include "pytorch_cuda_helper.hpp"
+#include "sigmoid_focal_loss_cuda_kernel.cuh"
+#include "softmax_focal_loss_cuda_kernel.cuh"
+
+void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
+                                               Tensor weight, Tensor output,
+                                               const float gamma,
+                                               const float alpha) {
+  int output_size = output.numel();
+  int num_classes = input.size(1);
+  AT_ASSERTM(target.max().item<int64_t>() <= (int64_t)num_classes,
+             "target label should smaller or equal than num classes");
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "sigmoid_focal_loss_forward_cuda_kernel", [&] {
+        sigmoid_focal_loss_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.data_ptr<scalar_t>(),
+                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
+                output.data_ptr<scalar_t>(), gamma, alpha, num_classes);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
+                                                Tensor weight,
+                                                Tensor grad_input,
+                                                const float gamma,
+                                                const float alpha) {
+  int output_size = grad_input.numel();
+  int num_classes = input.size(1);
+
+  at::cuda::CUDAGuard device_guard(grad_input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "sigmoid_focal_loss_backward_cuda_kernel", [&] {
+        sigmoid_focal_loss_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.data_ptr<scalar_t>(),
+                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
+                grad_input.data_ptr<scalar_t>(), gamma, alpha, num_classes);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target,
+                                               Tensor weight, Tensor output,
+                                               const float gamma,
+                                               const float alpha) {
+  int output_size = output.numel();
+  int num_classes = softmax.size(1);
+
+  AT_ASSERTM(target.max().item<int64_t>() <= (int64_t)num_classes,
+             "target label should smaller or equal than num classes");
+  at::cuda::CUDAGuard device_guard(softmax.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      softmax.scalar_type(), "softmax_focal_loss_forward_cuda_kernel", [&] {
+        softmax_focal_loss_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, softmax.data_ptr<scalar_t>(),
+                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
+                output.data_ptr<scalar_t>(), gamma, alpha, num_classes);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target,
+                                                Tensor weight, Tensor buff,
+                                                Tensor grad_input,
+                                                const float gamma,
+                                                const float alpha) {
+  int num_classes = softmax.size(1);
+
+  int output_size = buff.numel();
+  at::cuda::CUDAGuard device_guard(grad_input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_input.scalar_type(),
+      "softmax_focal_loss_backward_cuda1_"
+      "kernel",
+      [&] {
+        softmax_focal_loss_backward_cuda1_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, softmax.data_ptr<scalar_t>(),
+                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
+                buff.data_ptr<scalar_t>(), gamma, alpha, num_classes);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+
+  output_size = grad_input.numel();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_input.scalar_type(),
+      "softmax_focal_loss_backward_cuda2_"
+      "kernel",
+      [&] {
+        softmax_focal_loss_backward_cuda2_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, softmax.data_ptr<scalar_t>(),
+                target.data_ptr<int64_t>(), buff.data_ptr<scalar_t>(),
+                grad_input.data_ptr<scalar_t>(), num_classes);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/info.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/info.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a2ebafa843b2314de8d1cb844afc46b147f7fca1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/info.cpp
@@ -0,0 +1,49 @@
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+#include <cuda_runtime_api.h>
+int get_cudart_version() { return CUDART_VERSION; }
+#endif
+
+std::string get_compiling_cuda_version() {
+#ifdef MMCV_WITH_CUDA
+  std::ostringstream oss;
+  // copied from
+  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
+  auto printCudaStyleVersion = [&](int v) {
+    oss << (v / 1000) << "." << (v / 10 % 100);
+    if (v % 10 != 0) {
+      oss << "." << (v % 10);
+    }
+  };
+  printCudaStyleVersion(get_cudart_version());
+  return oss.str();
+#else
+  return std::string("not available");
+#endif
+}
+
+// similar to
+// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
+std::string get_compiler_version() {
+  std::ostringstream ss;
+#if defined(__GNUC__)
+#ifndef __clang__
+  { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
+#endif
+#endif
+
+#if defined(__clang_major__)
+  {
+    ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
+       << __clang_patchlevel__;
+  }
+#endif
+
+#if defined(_MSC_VER)
+  { ss << "MSVC " << _MSC_FULL_VER; }
+#endif
+  return ss.str();
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/masked_conv2d.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/masked_conv2d.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8efc8eba715b3adb61d465a436d9cf9c7aeafa14
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/masked_conv2d.cpp
@@ -0,0 +1,74 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
+                                           const Tensor mask_h_idx,
+                                           const Tensor mask_w_idx,
+                                           Tensor top_data, const int kernel_h,
+                                           const int kernel_w, const int pad_h,
+                                           const int pad_w);
+
+void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data,
+                                           const Tensor mask_h_idx,
+                                           const Tensor mask_w_idx,
+                                           Tensor top_data, const int height,
+                                           const int width, const int channels);
+
+void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
+                                const Tensor mask_w_idx, Tensor col,
+                                const int kernel_h, const int kernel_w,
+                                const int pad_h, const int pad_w) {
+  // im: (n, ic, h, w), kernel size (kh, kw)
+  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
+  MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col,
+                                        kernel_h, kernel_w, pad_h, pad_w);
+}
+
+void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
+                                const Tensor mask_w_idx, Tensor im, int height,
+                                int width, int channels) {
+  // im: (n, ic, h, w), kernel size (kh, kw)
+  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
+  MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height,
+                                        width, channels);
+}
+#endif
+
+void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
+                           const Tensor mask_w_idx, Tensor col,
+                           const int kernel_h, const int kernel_w,
+                           const int pad_h, const int pad_w) {
+  if (im.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(im);
+    CHECK_CUDA_INPUT(mask_h_idx);
+    CHECK_CUDA_INPUT(mask_w_idx);
+    CHECK_CUDA_INPUT(col);
+    masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, col, kernel_h,
+                               kernel_w, pad_h, pad_w);
+#else
+    AT_ERROR("MaskConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("MaskConv is not implemented on CPU");
+  }
+}
+
+void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
+                           const Tensor mask_w_idx, Tensor im, int height,
+                           int width, int channels) {
+  if (col.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(col);
+    CHECK_CUDA_INPUT(mask_h_idx);
+    CHECK_CUDA_INPUT(mask_w_idx);
+    CHECK_CUDA_INPUT(im);
+    masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, im, height, width,
+                               channels);
+#else
+    AT_ERROR("MaskConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("MaskConv is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..564195c337dd6c17190764cc8217cd59a09aec9c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu
@@ -0,0 +1,53 @@
+#include "masked_conv2d_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
+                                           const Tensor mask_h_idx,
+                                           const Tensor mask_w_idx,
+                                           Tensor top_data, const int kernel_h,
+                                           const int kernel_w, const int pad_h,
+                                           const int pad_w) {
+  int channels = bottom_data.size(1);
+  int height = bottom_data.size(2);
+  int width = bottom_data.size(3);
+  int mask_cnt = mask_h_idx.size(0);
+  int output_size = mask_cnt * channels;
+
+  at::cuda::CUDAGuard device_guard(bottom_data.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] {
+        const scalar_t *bottom_data_ = bottom_data.data_ptr<scalar_t>();
+        const int64_t *mask_h_idx_ = mask_h_idx.data_ptr<int64_t>();
+        const int64_t *mask_w_idx_ = mask_w_idx.data_ptr<int64_t>();
+        scalar_t *top_data_ = top_data.data_ptr<scalar_t>();
+        MaskedIm2colForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, bottom_data_, height, width, kernel_h, kernel_w,
+                pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void MaskedCol2imForwardCUDAKernelLauncher(
+    const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx,
+    Tensor top_data, const int height, const int width, const int channels) {
+  int mask_cnt = mask_h_idx.size(0);
+  int output_size = mask_cnt * channels;
+
+  at::cuda::CUDAGuard device_guard(bottom_data.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] {
+        const scalar_t *bottom_data_ = bottom_data.data_ptr<scalar_t>();
+        const int64_t *mask_h_idx_ = mask_h_idx.data_ptr<int64_t>();
+        const int64_t *mask_w_idx_ = mask_w_idx.data_ptr<int64_t>();
+        scalar_t *top_data_ = top_data.data_ptr<scalar_t>();
+
+        MaskedCol2imForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, bottom_data_, height, width, channels, mask_h_idx_,
+                mask_w_idx_, mask_cnt, top_data_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a7e6013fb342e4465f4429952e2a662b3d9d04d2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
@@ -0,0 +1,109 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void ModulatedDeformConvForwardCUDAKernelLauncher(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    const int deformable_group, const bool with_bias);
+
+void ModulatedDeformConvBackwardCUDAKernelLauncher(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias);
+
+void modulated_deform_conv_forward_cuda(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    const int deformable_group, const bool with_bias) {
+  ModulatedDeformConvForwardCUDAKernelLauncher(
+      input, weight, bias, ones, offset, mask, output, columns, kernel_h,
+      kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+      deformable_group, with_bias);
+}
+
+void modulated_deform_conv_backward_cuda(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  ModulatedDeformConvBackwardCUDAKernelLauncher(
+      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
+      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
+      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+      deformable_group, with_bias);
+}
+#endif
+
+void modulated_deform_conv_forward(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    const int deformable_group, const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(bias);
+    CHECK_CUDA_INPUT(ones);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(mask);
+    CHECK_CUDA_INPUT(output);
+    CHECK_CUDA_INPUT(columns);
+
+    modulated_deform_conv_forward_cuda(
+        input, weight, bias, ones, offset, mask, output, columns, kernel_h,
+        kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
+        group, deformable_group, with_bias);
+#else
+    AT_ERROR("ModulatedDeformConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ModulatedDeformConv is not implemented on CPU");
+  }
+}
+
+void modulated_deform_conv_backward(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(bias);
+    CHECK_CUDA_INPUT(ones);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(mask);
+    CHECK_CUDA_INPUT(columns);
+    CHECK_CUDA_INPUT(grad_input);
+    CHECK_CUDA_INPUT(grad_weight);
+    CHECK_CUDA_INPUT(grad_bias);
+    CHECK_CUDA_INPUT(grad_offset);
+    CHECK_CUDA_INPUT(grad_mask);
+    CHECK_CUDA_INPUT(grad_output);
+
+    modulated_deform_conv_backward_cuda(
+        input, weight, bias, ones, offset, mask, columns, grad_input,
+        grad_weight, grad_bias, grad_offset, grad_mask, grad_output, kernel_h,
+        kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
+        group, deformable_group, with_bias);
+#else
+    AT_ERROR("ModulatedDeformConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ModulatedDeformConv is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..cba49373cc591966c3b53f7c50a4c6912bf09e3b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu
@@ -0,0 +1,286 @@
+#include "modulated_deform_conv_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void modulated_deformable_im2col_cuda(
+    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
+    const int batch_size, const int channels, const int height_im,
+    const int width_im, const int height_col, const int width_col,
+    const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int dilation_h,
+    const int dilation_w, const int deformable_group, Tensor data_col) {
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        modulated_deformable_im2col_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
+            at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_im_, data_offset_, data_mask_, height_im,
+            width_im, kernel_h, kenerl_w, pad_h, pad_w, stride_h, stride_w,
+            dilation_h, dilation_w, channel_per_deformable_group, batch_size,
+            channels, deformable_group, height_col, width_col, data_col_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void modulated_deformable_col2im_cuda(
+    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
+    const int batch_size, const int channels, const int height_im,
+    const int width_im, const int height_col, const int width_col,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int dilation_h,
+    const int dilation_w, const int deformable_group, Tensor grad_im) {
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels =
+      channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
+            at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_offset_, data_mask_, channels,
+            height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+            stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+            batch_size, deformable_group, height_col, width_col, grad_im_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void modulated_deformable_col2im_coord_cuda(
+    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
+    const Tensor data_mask, const int batch_size, const int channels,
+    const int height_im, const int width_im, const int height_col,
+    const int width_col, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w, const int deformable_group,
+    Tensor grad_offset, Tensor grad_mask) {
+  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h *
+                          kernel_w * deformable_group;
+  const int channel_per_deformable_group =
+      channels * kernel_h * kernel_w / deformable_group;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
+        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
+        scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_coord_gpu_kernel<<<
+            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
+            at::cuda::getCurrentCUDAStream()>>>(
+            num_kernels, data_col_, data_im_, data_offset_, data_mask_,
+            channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w,
+            stride_h, stride_w, dilation_h, dilation_w,
+            channel_per_deformable_group, batch_size,
+            2 * kernel_h * kernel_w * deformable_group, deformable_group,
+            height_col, width_col, grad_offset_, grad_mask_);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void ModulatedDeformConvForwardCUDAKernelLauncher(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    const int deformable_group, const bool with_bias) {
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_out = weight.size(0);
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
+             kernel_h_, kernel_w, kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).",
+             channels, channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  // resize output
+  output = output.view({batch, channels_out, height_out, width_out}).zero_();
+  // resize temporary columns
+  columns =
+      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
+                input.options());
+
+  output = output.view({output.size(0), group, output.size(1) / group,
+                        output.size(2), output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns);
+
+    // divide into group
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+
+    for (int g = 0; g < group; g++) {
+      output[b][g] = output[b][g]
+                         .flatten(1)
+                         .addmm_(weight[g].flatten(1), columns[g])
+                         .view_as(output[b][g]);
+    }
+
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+  }
+
+  output = output.view({output.size(0), output.size(1) * output.size(2),
+                        output.size(3), output.size(4)});
+
+  if (with_bias) {
+    output += bias.view({1, bias.size(0), 1, 1});
+  }
+}
+
+void ModulatedDeformConvBackwardCUDAKernelLauncher(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
+             kernel_h_, kernel_w, kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).",
+             channels, channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  grad_input = grad_input.view({batch, channels, height, width});
+  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
+                      input.options());
+
+  grad_output =
+      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
+                        grad_output.size(2), grad_output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    // divide int group
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view({group, weight.size(0) / group, weight.size(1),
+                          weight.size(2), weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+                        grad_output[b][g].flatten(1), 0.0f, 1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+                          weight.size(3), weight.size(4)});
+
+    // gradient w.r.t. input coordinate data
+    modulated_deformable_col2im_coord_cuda(
+        columns, input[b], offset[b], mask[b], 1, channels, height, width,
+        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
+        grad_mask[b]);
+    // gradient w.r.t. input data
+    modulated_deformable_col2im_cuda(
+        columns, offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, grad_input[b]);
+
+    // gradient w.r.t. weight, dWeight should accumulate across the batch and
+    // group
+    modulated_deformable_im2col_cuda(
+        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, deformable_group, columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
+                                    grad_weight.size(1), grad_weight.size(2),
+                                    grad_weight.size(3)});
+    if (with_bias)
+      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
+
+    for (int g = 0; g < group; g++) {
+      grad_weight[g] =
+          grad_weight[g]
+              .flatten(1)
+              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
+              .view_as(grad_weight[g]);
+      if (with_bias) {
+        grad_bias[g] =
+            grad_bias[g]
+                .view({-1, 1})
+                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
+                .view(-1);
+      }
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
+                                    grad_weight.size(2), grad_weight.size(3),
+                                    grad_weight.size(4)});
+    if (with_bias)
+      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
+  }
+  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
+                                  grad_output.size(2), grad_output.size(3),
+                                  grad_output.size(4)});
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6db461ffe4dc7e50b88382b84f9a0294cc2e59b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms.cpp
@@ -0,0 +1,260 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
+                             int offset);
+
+Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
+  return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset);
+}
+#endif
+
+Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
+  if (boxes.numel() == 0) {
+    return at::empty({0}, boxes.options().dtype(at::kLong));
+  }
+  auto x1_t = boxes.select(1, 0).contiguous();
+  auto y1_t = boxes.select(1, 1).contiguous();
+  auto x2_t = boxes.select(1, 2).contiguous();
+  auto y2_t = boxes.select(1, 3).contiguous();
+
+  Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset);
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+  auto nboxes = boxes.size(0);
+  Tensor select_t = at::ones({nboxes}, boxes.options().dtype(at::kBool));
+
+  auto select = select_t.data_ptr<bool>();
+  auto order = order_t.data_ptr<int64_t>();
+  auto x1 = x1_t.data_ptr<float>();
+  auto y1 = y1_t.data_ptr<float>();
+  auto x2 = x2_t.data_ptr<float>();
+  auto y2 = y2_t.data_ptr<float>();
+  auto areas = areas_t.data_ptr<float>();
+
+  for (int64_t _i = 0; _i < nboxes; _i++) {
+    if (select[_i] == false) continue;
+    auto i = order[_i];
+    auto ix1 = x1[i];
+    auto iy1 = y1[i];
+    auto ix2 = x2[i];
+    auto iy2 = y2[i];
+    auto iarea = areas[i];
+
+    for (int64_t _j = _i + 1; _j < nboxes; _j++) {
+      if (select[_j] == false) continue;
+      auto j = order[_j];
+      auto xx1 = std::max(ix1, x1[j]);
+      auto yy1 = std::max(iy1, y1[j]);
+      auto xx2 = std::min(ix2, x2[j]);
+      auto yy2 = std::min(iy2, y2[j]);
+
+      auto w = std::max(0.f, xx2 - xx1 + offset);
+      auto h = std::max(0.f, yy2 - yy1 + offset);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + areas[j] - inter);
+      if (ovr >= iou_threshold) select[_j] = false;
+    }
+  }
+  return order_t.masked_select(select_t);
+}
+
+Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
+  if (boxes.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(boxes);
+    CHECK_CUDA_INPUT(scores);
+    return nms_cuda(boxes, scores, iou_threshold, offset);
+#else
+    AT_ERROR("nms is not compiled with GPU support");
+#endif
+  } else {
+    CHECK_CPU_INPUT(boxes);
+    CHECK_CPU_INPUT(scores);
+    return nms_cpu(boxes, scores, iou_threshold, offset);
+  }
+}
+
+Tensor softnms_cpu(Tensor boxes, Tensor scores, Tensor dets,
+                   float iou_threshold, float sigma, float min_score,
+                   int method, int offset) {
+  if (boxes.numel() == 0) {
+    return at::empty({0}, boxes.options().dtype(at::kLong));
+  }
+
+  auto x1_t = boxes.select(1, 0).contiguous();
+  auto y1_t = boxes.select(1, 1).contiguous();
+  auto x2_t = boxes.select(1, 2).contiguous();
+  auto y2_t = boxes.select(1, 3).contiguous();
+  auto scores_t = scores.clone();
+
+  Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset);
+
+  auto nboxes = boxes.size(0);
+  auto x1 = x1_t.data_ptr<float>();
+  auto y1 = y1_t.data_ptr<float>();
+  auto x2 = x2_t.data_ptr<float>();
+  auto y2 = y2_t.data_ptr<float>();
+  auto sc = scores_t.data_ptr<float>();
+  auto areas = areas_t.data_ptr<float>();
+  auto de = dets.data_ptr<float>();
+
+  int64_t pos = 0;
+  Tensor inds_t = at::arange(nboxes, boxes.options().dtype(at::kLong));
+  auto inds = inds_t.data_ptr<int64_t>();
+
+  for (int64_t i = 0; i < nboxes; i++) {
+    auto max_score = sc[i];
+    auto max_pos = i;
+
+    pos = i + 1;
+    // get max box
+    while (pos < nboxes) {
+      if (max_score < sc[pos]) {
+        max_score = sc[pos];
+        max_pos = pos;
+      }
+      pos = pos + 1;
+    }
+    // swap
+    auto ix1 = de[i * 5 + 0] = x1[max_pos];
+    auto iy1 = de[i * 5 + 1] = y1[max_pos];
+    auto ix2 = de[i * 5 + 2] = x2[max_pos];
+    auto iy2 = de[i * 5 + 3] = y2[max_pos];
+    auto iscore = de[i * 5 + 4] = sc[max_pos];
+    auto iarea = areas[max_pos];
+    auto iind = inds[max_pos];
+    x1[max_pos] = x1[i];
+    y1[max_pos] = y1[i];
+    x2[max_pos] = x2[i];
+    y2[max_pos] = y2[i];
+    sc[max_pos] = sc[i];
+    areas[max_pos] = areas[i];
+    inds[max_pos] = inds[i];
+    x1[i] = ix1;
+    y1[i] = iy1;
+    x2[i] = ix2;
+    y2[i] = iy2;
+    sc[i] = iscore;
+    areas[i] = iarea;
+    inds[i] = iind;
+
+    pos = i + 1;
+    while (pos < nboxes) {
+      auto xx1 = std::max(ix1, x1[pos]);
+      auto yy1 = std::max(iy1, y1[pos]);
+      auto xx2 = std::min(ix2, x2[pos]);
+      auto yy2 = std::min(iy2, y2[pos]);
+
+      auto w = std::max(0.f, xx2 - xx1 + offset);
+      auto h = std::max(0.f, yy2 - yy1 + offset);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + areas[pos] - inter);
+
+      float weight = 1.;
+      if (method == 0) {
+        if (ovr >= iou_threshold) weight = 0;
+      } else if (method == 1) {
+        if (ovr >= iou_threshold) weight = 1 - ovr;
+      } else if (method == 2) {
+        weight = std::exp(-(ovr * ovr) / sigma);
+      }
+      sc[pos] *= weight;
+      // if box score falls below threshold, discard the box by
+      // swapping with last box update N
+      if (sc[pos] < min_score) {
+        x1[pos] = x1[nboxes - 1];
+        y1[pos] = y1[nboxes - 1];
+        x2[pos] = x2[nboxes - 1];
+        y2[pos] = y2[nboxes - 1];
+        sc[pos] = sc[nboxes - 1];
+        areas[pos] = areas[nboxes - 1];
+        inds[pos] = inds[nboxes - 1];
+        nboxes = nboxes - 1;
+        pos = pos - 1;
+      }
+      pos = pos + 1;
+    }
+  }
+  return inds_t.slice(0, 0, nboxes);
+}
+
+Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
+               float sigma, float min_score, int method, int offset) {
+  if (boxes.device().is_cuda()) {
+    AT_ERROR("softnms is not implemented on GPU");
+  } else {
+    return softnms_cpu(boxes, scores, dets, iou_threshold, sigma, min_score,
+                       method, offset);
+  }
+}
+
+std::vector<std::vector<int> > nms_match_cpu(Tensor dets, float iou_threshold) {
+  auto x1_t = dets.select(1, 0).contiguous();
+  auto y1_t = dets.select(1, 1).contiguous();
+  auto x2_t = dets.select(1, 2).contiguous();
+  auto y2_t = dets.select(1, 3).contiguous();
+  auto scores = dets.select(1, 4).contiguous();
+
+  at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t);
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+  auto ndets = dets.size(0);
+  at::Tensor suppressed_t =
+      at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
+
+  auto suppressed = suppressed_t.data_ptr<uint8_t>();
+  auto order = order_t.data_ptr<int64_t>();
+  auto x1 = x1_t.data_ptr<float>();
+  auto y1 = y1_t.data_ptr<float>();
+  auto x2 = x2_t.data_ptr<float>();
+  auto y2 = y2_t.data_ptr<float>();
+  auto areas = areas_t.data_ptr<float>();
+
+  std::vector<int> keep;
+  std::vector<std::vector<int> > matched;
+
+  for (int64_t _i = 0; _i < ndets; _i++) {
+    auto i = order[_i];
+    if (suppressed[i] == 1) continue;
+    keep.push_back(i);
+    std::vector<int> v_i;
+    auto ix1 = x1[i];
+    auto iy1 = y1[i];
+    auto ix2 = x2[i];
+    auto iy2 = y2[i];
+    auto iarea = areas[i];
+
+    for (int64_t _j = _i + 1; _j < ndets; _j++) {
+      auto j = order[_j];
+      if (suppressed[j] == 1) continue;
+      auto xx1 = std::max(ix1, x1[j]);
+      auto yy1 = std::max(iy1, y1[j]);
+      auto xx2 = std::min(ix2, x2[j]);
+      auto yy2 = std::min(iy2, y2[j]);
+
+      auto w = std::max(static_cast<float>(0), xx2 - xx1);
+      auto h = std::max(static_cast<float>(0), yy2 - yy1);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + areas[j] - inter);
+      if (ovr >= iou_threshold) {
+        suppressed[j] = 1;
+        v_i.push_back(j);
+      }
+    }
+    matched.push_back(v_i);
+  }
+  for (int i = 0; i < keep.size(); i++)
+    matched[i].insert(matched[i].begin(), keep[i]);
+  return matched;
+}
+
+std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold) {
+  if (dets.device().is_cuda()) {
+    AT_ERROR("nms_match is not implemented on GPU");
+  } else {
+    return nms_match_cpu(dets, iou_threshold);
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..893acae52b7d1f914e2bf177c975cf8248376f25
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_cuda.cu
@@ -0,0 +1,52 @@
+#include "nms_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
+                             int offset) {
+  at::cuda::CUDAGuard device_guard(boxes.device());
+
+  if (boxes.numel() == 0) {
+    return at::empty({0}, boxes.options().dtype(at::kLong));
+  }
+  auto order_t = std::get<1>(scores.sort(0, /*descending=*/true));
+  auto boxes_sorted = boxes.index_select(0, order_t);
+
+  int boxes_num = boxes.size(0);
+  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+  Tensor mask =
+      at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(threadsPerBlock);
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  nms_cuda<<<blocks, threads, 0, stream>>>(
+      boxes_num, iou_threshold, offset, boxes_sorted.data_ptr<float>(),
+      (unsigned long long*)mask.data_ptr<int64_t>());
+
+  at::Tensor mask_cpu = mask.to(at::kCPU);
+  unsigned long long* mask_host =
+      (unsigned long long*)mask_cpu.data_ptr<int64_t>();
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  at::Tensor keep_t =
+      at::zeros({boxes_num}, boxes.options().dtype(at::kBool).device(at::kCPU));
+  bool* keep = keep_t.data_ptr<bool>();
+
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep[i] = true;
+      // set every overlap box with bit 1 in remv
+      unsigned long long* p = mask_host + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return order_t.masked_select(keep_t.to(at::kCUDA));
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e4ef676a9d6f94e5f60b7c9e1df8ce78eb6cbaa2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated.cpp
@@ -0,0 +1,32 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h
+#include "pytorch_cpp_helper.hpp"
+
+Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores,
+                       const float iou_threshold);
+
+#ifdef MMCV_WITH_CUDA
+Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
+                        const Tensor order, const Tensor dets_sorted,
+                        const float iou_threshold, const int multi_label);
+#endif
+
+// Interface for Python
+// inline is needed to prevent multiple function definitions when this header is
+// included by different cpps
+Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
+                   const Tensor dets_sorted, const float iou_threshold,
+                   const int multi_label) {
+  assert(dets.device().is_cuda() == scores.device().is_cuda());
+  if (dets.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold,
+                            multi_label);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+
+  return nms_rotated_cpu(dets, scores, iou_threshold);
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated_cpu.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..042cb7e8fe8ef361c12c0e96d76ffe9efabbde42
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated_cpu.cpp
@@ -0,0 +1,66 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
+#include "box_iou_rotated_utils.hpp"
+#include "pytorch_cpp_helper.hpp"
+
+template <typename scalar_t>
+Tensor nms_rotated_cpu_kernel(const Tensor dets, const Tensor scores,
+                              const float iou_threshold) {
+  // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
+  // however, the code in this function is much shorter because
+  // we delegate the IoU computation for rotated boxes to
+  // the single_box_iou_rotated function in box_iou_rotated_utils.h
+  AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
+  AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
+  AT_ASSERTM(dets.type() == scores.type(),
+             "dets should have the same type as scores");
+
+  if (dets.numel() == 0) {
+    return at::empty({0}, dets.options().dtype(at::kLong));
+  }
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+  auto ndets = dets.size(0);
+  Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
+  Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
+
+  auto suppressed = suppressed_t.data_ptr<uint8_t>();
+  auto keep = keep_t.data_ptr<int64_t>();
+  auto order = order_t.data_ptr<int64_t>();
+
+  int64_t num_to_keep = 0;
+
+  for (int64_t _i = 0; _i < ndets; _i++) {
+    auto i = order[_i];
+    if (suppressed[i] == 1) {
+      continue;
+    }
+
+    keep[num_to_keep++] = i;
+
+    for (int64_t _j = _i + 1; _j < ndets; _j++) {
+      auto j = order[_j];
+      if (suppressed[j] == 1) {
+        continue;
+      }
+
+      auto ovr = single_box_iou_rotated<scalar_t>(
+          dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>(), 0);
+      if (ovr >= iou_threshold) {
+        suppressed[j] = 1;
+      }
+    }
+  }
+  return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
+}
+
+Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores,
+                       const float iou_threshold) {
+  auto result = at::empty({0}, dets.options());
+  AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms_rotated", [&] {
+    result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
+  });
+  return result;
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..60fe7d15e294dc141670de050f6c7d3bbd81bd6a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/nms_rotated_cuda.cu
@@ -0,0 +1,61 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+#include "nms_rotated_cuda.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
+                        const Tensor order_t, const Tensor dets_sorted,
+                        float iou_threshold, const int multi_label) {
+  // using scalar_t = float;
+  AT_ASSERTM(dets.type().is_cuda(), "dets must be a CUDA tensor");
+  AT_ASSERTM(scores.type().is_cuda(), "scores must be a CUDA tensor");
+  at::cuda::CUDAGuard device_guard(dets.device());
+
+  int dets_num = dets.size(0);
+
+  const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock);
+
+  Tensor mask =
+      at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
+
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(threadsPerBlock);
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      dets_sorted.type(), "nms_rotated_kernel_cuda", [&] {
+        nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+            dets_num, iou_threshold, dets_sorted.data<scalar_t>(),
+            (unsigned long long*)mask.data<int64_t>(), multi_label);
+      });
+
+  Tensor mask_cpu = mask.to(at::kCPU);
+  unsigned long long* mask_host = (unsigned long long*)mask_cpu.data<int64_t>();
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  Tensor keep =
+      at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
+  int64_t* keep_out = keep.data<int64_t>();
+
+  int num_to_keep = 0;
+  for (int i = 0; i < dets_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long* p = mask_host + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return order_t.index(
+      {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
+           .to(order_t.device(), keep.scalar_type())});
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/psamask.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/psamask.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d5d0e5685609ae2b2c3a84b18c3a2087f6f3f7fe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/psamask.cpp
@@ -0,0 +1,255 @@
+// Modified from
+// https://github.com/hszhao/semseg/blob/master/lib/psa/src
+#include "pytorch_cpp_helper.hpp"
+
+#ifndef min
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+void psamask_collect_forward(const int num_, const int h_feature,
+                             const int w_feature, const int h_mask,
+                             const int w_mask, const int half_h_mask,
+                             const int half_w_mask, const Tensor mask_data,
+                             Tensor buffer_data) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            buffer_data.view({-1})[(n * h_feature * w_feature +
+                                    (hidx + h - half_h_mask) * w_feature +
+                                    (widx + w - half_w_mask)) *
+                                       h_feature * w_feature +
+                                   h * w_feature + w] =
+                mask_data.view(
+                    {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                               h_feature +
+                           h) *
+                              w_feature +
+                          w];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_distribute_forward(const int num_, const int h_feature,
+                                const int w_feature, const int h_mask,
+                                const int w_mask, const int half_h_mask,
+                                const int half_w_mask, const Tensor mask_data,
+                                Tensor buffer_data) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            buffer_data.view(
+                {-1})[(n * h_feature * w_feature + h * w_feature + w) *
+                          h_feature * w_feature +
+                      (hidx + h - half_h_mask) * w_feature +
+                      (widx + w - half_w_mask)] =
+                mask_data.view(
+                    {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                               h_feature +
+                           h) *
+                              w_feature +
+                          w];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_collect_backward(const int num_, const int h_feature,
+                              const int w_feature, const int h_mask,
+                              const int w_mask, const int half_h_mask,
+                              const int half_w_mask, const Tensor buffer_diff,
+                              Tensor mask_diff) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                                      h_feature +
+                                  h) *
+                                     w_feature +
+                                 w] =
+                buffer_diff.view({-1})[(n * h_feature * w_feature +
+                                        (hidx + h - half_h_mask) * w_feature +
+                                        (widx + w - half_w_mask)) *
+                                           h_feature * w_feature +
+                                       h * w_feature + w];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_distribute_backward(const int num_, const int h_feature,
+                                 const int w_feature, const int h_mask,
+                                 const int w_mask, const int half_h_mask,
+                                 const int half_w_mask,
+                                 const Tensor buffer_diff, Tensor mask_diff) {
+  for (int n = 0; n < num_; n++) {
+    for (int h = 0; h < h_feature; h++) {
+      for (int w = 0; w < w_feature; w++) {
+        // effective mask region : [hstart, hend) x [wstart, wend) with
+        // mask-indexed
+        const int hstart = max(0, half_h_mask - h);
+        const int hend = min(h_mask, h_feature + half_h_mask - h);
+        const int wstart = max(0, half_w_mask - w);
+        const int wend = min(w_mask, w_feature + half_w_mask - w);
+        // (hidx,                    widx                   ) with mask-indexed
+        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
+        // feature-indexed
+        for (int hidx = hstart; hidx < hend; hidx++) {
+          for (int widx = wstart; widx < wend; widx++) {
+            mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
+                                      h_feature +
+                                  h) *
+                                     w_feature +
+                                 w] =
+                buffer_diff.view(
+                    {-1})[(n * h_feature * w_feature + h * w_feature + w) *
+                              h_feature * w_feature +
+                          (hidx + h - half_h_mask) * w_feature +
+                          (widx + w - half_w_mask)];
+          }
+        }
+      }
+    }
+  }
+}
+
+void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output,
+                         const int num_, const int h_feature,
+                         const int w_feature, const int h_mask,
+                         const int w_mask, const int half_h_mask,
+                         const int half_w_mask) {
+  if (psa_type == 0)
+    psamask_collect_forward(num_, h_feature, w_feature, h_mask, w_mask,
+                            half_h_mask, half_w_mask, input, output);
+  else
+    psamask_distribute_forward(num_, h_feature, w_feature, h_mask, w_mask,
+                               half_h_mask, half_w_mask, input, output);
+}
+
+void psamask_backward_cpu(const int psa_type, const Tensor grad_output,
+                          Tensor grad_input, const int num_,
+                          const int h_feature, const int w_feature,
+                          const int h_mask, const int w_mask,
+                          const int half_h_mask, const int half_w_mask) {
+  if (psa_type == 0)
+    psamask_collect_backward(num_, h_feature, w_feature, h_mask, w_mask,
+                             half_h_mask, half_w_mask, grad_output, grad_input);
+  else
+    psamask_distribute_backward(num_, h_feature, w_feature, h_mask, w_mask,
+                                half_h_mask, half_w_mask, grad_output,
+                                grad_input);
+}
+
+#ifdef MMCV_WITH_CUDA
+void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
+                                      Tensor output, const int num_,
+                                      const int h_feature, const int w_feature,
+                                      const int h_mask, const int w_mask,
+                                      const int half_h_mask,
+                                      const int half_w_mask);
+
+void PSAMaskBackwardCUDAKernelLauncher(
+    const int psa_type, const Tensor grad_output, Tensor grad_input,
+    const int num_, const int h_feature, const int w_feature, const int h_mask,
+    const int w_mask, const int half_h_mask, const int half_w_mask);
+
+void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output,
+                          const int num_, const int h_feature,
+                          const int w_feature, const int h_mask,
+                          const int w_mask, const int half_h_mask,
+                          const int half_w_mask) {
+  PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature,
+                                   w_feature, h_mask, w_mask, half_h_mask,
+                                   half_w_mask);
+}
+
+void psamask_backward_cuda(const int psa_type, const Tensor grad_output,
+                           Tensor grad_input, const int num_,
+                           const int h_feature, const int w_feature,
+                           const int h_mask, const int w_mask,
+                           const int half_h_mask, const int half_w_mask) {
+  PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_,
+                                    h_feature, w_feature, h_mask, w_mask,
+                                    half_h_mask, half_w_mask);
+}
+#endif
+
+void psamask_forward(const Tensor input, Tensor output, const int psa_type,
+                     const int num_, const int h_feature, const int w_feature,
+                     const int h_mask, const int w_mask, const int half_h_mask,
+                     const int half_w_mask) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(output);
+    psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature,
+                         h_mask, w_mask, half_h_mask, half_w_mask);
+#else
+    AT_ERROR("PSAMask is not compiled with GPU support");
+#endif
+  } else {
+    psamask_forward_cpu(psa_type, input, output, num_, h_feature, w_feature,
+                        h_mask, w_mask, half_h_mask, half_w_mask);
+  }
+}
+
+void psamask_backward(Tensor grad_output, const Tensor grad_input,
+                      const int psa_type, const int num_, const int h_feature,
+                      const int w_feature, const int h_mask, const int w_mask,
+                      const int half_h_mask, const int half_w_mask) {
+  if (grad_input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_input);
+    CHECK_CUDA_INPUT(grad_output);
+    psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature,
+                          w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
+#else
+    AT_ERROR("PSAMask is not compiled with GPU support");
+#endif
+  } else {
+    psamask_backward_cpu(psa_type, grad_output, grad_input, num_, h_feature,
+                         w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/psamask_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/psamask_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7bea8aa387e734832c28435a7a64bdcd5f7dbfea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/psamask_cuda.cu
@@ -0,0 +1,62 @@
+// Modified from
+// https://github.com/hszhao/semseg/blob/master/lib/psa/src
+
+#include <THC/THC.h>
+#include <torch/serialize/tensor.h>
+
+#include <THC/THCDeviceUtils.cuh>
+
+#include "psamask_cuda_kernel.cuh"
+#include "pytorch_cuda_helper.hpp"
+
+void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
+                                      Tensor output, const int num_,
+                                      const int h_feature, const int w_feature,
+                                      const int h_mask, const int w_mask,
+                                      const int half_h_mask,
+                                      const int half_w_mask) {
+  int nthreads = num_ * h_feature * w_feature;
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  if (psa_type == 0)
+    AT_DISPATCH_FLOATING_TYPES(
+        input.scalar_type(), "psamask_collect_forward_cuda", [&] {
+          psamask_collect_forward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
+              nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+              half_w_mask, input.data_ptr<scalar_t>(),
+              output.data_ptr<scalar_t>());
+        });
+  else
+    AT_DISPATCH_FLOATING_TYPES(
+        input.scalar_type(), "psamask_distribute_forward_cuda", [&] {
+          psamask_distribute_forward_cuda<scalar_t>
+              <<<nthreads, 512, 0, stream>>>(
+                  nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+                  half_w_mask, input.data_ptr<scalar_t>(),
+                  output.data_ptr<scalar_t>());
+        });
+}
+
+void PSAMaskBackwardCUDAKernelLauncher(
+    const int psa_type, const Tensor grad_output, Tensor grad_input,
+    const int num_, const int h_feature, const int w_feature, const int h_mask,
+    const int w_mask, const int half_h_mask, const int half_w_mask) {
+  int nthreads = num_ * h_feature * w_feature;
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  if (psa_type == 0)
+    AT_DISPATCH_FLOATING_TYPES(
+        grad_input.scalar_type(), "psamask_collect_backward_cuda", [&] {
+          psamask_collect_backward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
+              nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+              half_w_mask, grad_output.data_ptr<scalar_t>(),
+              grad_input.data_ptr<scalar_t>());
+        });
+  else
+    AT_DISPATCH_FLOATING_TYPES(
+        grad_input.scalar_type(), "psamask_distribute_backward_cuda", [&] {
+          psamask_distribute_backward_cuda<scalar_t>
+              <<<nthreads, 512, 0, stream>>>(
+                  nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
+                  half_w_mask, grad_output.data_ptr<scalar_t>(),
+                  grad_input.data_ptr<scalar_t>());
+        });
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/pybind.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/pybind.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..70d5cebbdd1d3d51f01f544fcebcc833d7786fb6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/pybind.cpp
@@ -0,0 +1,373 @@
+#include "pytorch_cpp_helper.hpp"
+
+std::string get_compiler_version();
+std::string get_compiling_cuda_version();
+
+void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
+                          int kernel_size, int group_size, int scale_factor);
+
+void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
+                           Tensor bottom_grad, Tensor mask_grad,
+                           int kernel_size, int group_size, int scale_factor);
+
+void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
+                    Tensor routput, Tensor rmasks, Tensor output,
+                    int kernel_size, int group_size, int scale_factor);
+
+void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
+                     Tensor rtop_grad, Tensor rbottom_grad_hs,
+                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
+                     Tensor mask_grad, int kernel_size, int group_size,
+                     int scale_factor);
+
+void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
+                         Tensor output, Tensor columns, Tensor ones, int kW,
+                         int kH, int dW, int dH, int padW, int padH,
+                         int dilationW, int dilationH, int group,
+                         int deformable_group, int im2col_step);
+
+void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
+                                Tensor gradInput, Tensor gradOffset,
+                                Tensor weight, Tensor columns, int kW, int kH,
+                                int dW, int dH, int padW, int padH,
+                                int dilationW, int dilationH, int group,
+                                int deformable_group, int im2col_step);
+
+void deform_conv_backward_parameters(Tensor input, Tensor offset,
+                                     Tensor gradOutput, Tensor gradWeight,
+                                     Tensor columns, Tensor ones, int kW,
+                                     int kH, int dW, int dH, int padW, int padH,
+                                     int dilationW, int dilationH, int group,
+                                     int deformable_group, float scale,
+                                     int im2col_step);
+
+void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
+                             Tensor output, int pooled_height, int pooled_width,
+                             float spatial_scale, int sampling_ratio,
+                             float gamma);
+
+void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
+                              Tensor offset, Tensor grad_input,
+                              Tensor grad_offset, int pooled_height,
+                              int pooled_width, float spatial_scale,
+                              int sampling_ratio, float gamma);
+
+void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
+                                Tensor output, float gamma, float alpha);
+
+void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
+                                 Tensor grad_input, float gamma, float alpha);
+
+void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
+                                Tensor output, float gamma, float alpha);
+
+void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
+                                 Tensor buff, Tensor grad_input, float gamma,
+                                 float alpha);
+
+void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
+                   const int mode, const bool aligned, const int offset);
+
+void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
+                           const Tensor mask_w_idx, Tensor col,
+                           const int kernel_h, const int kernel_w,
+                           const int pad_h, const int pad_w);
+
+void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
+                           const Tensor mask_w_idx, Tensor im, int height,
+                           int width, int channels);
+
+void modulated_deform_conv_forward(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    const int deformable_group, const bool with_bias);
+
+void modulated_deform_conv_backward(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias);
+
+Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset);
+
+Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
+               float sigma, float min_score, int method, int offset);
+
+std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold);
+
+void roi_align_forward(Tensor input, Tensor rois, Tensor output,
+                       Tensor argmax_y, Tensor argmax_x, int aligned_height,
+                       int aligned_width, float spatial_scale,
+                       int sampling_ratio, int pool_mode, bool aligned);
+
+void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
+                        Tensor argmax_x, Tensor grad_input, int aligned_height,
+                        int aligned_width, float spatial_scale,
+                        int sampling_ratio, int pool_mode, bool aligned);
+
+void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
+                      int pooled_height, int pooled_width, float spatial_scale);
+
+void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
+                       Tensor grad_input, int pooled_height, int pooled_width,
+                       float spatial_scale);
+
+void sync_bn_forward_mean(const Tensor input, Tensor mean);
+
+void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var);
+
+void sync_bn_forward_output(const Tensor input, const Tensor mean,
+                            const Tensor var, const Tensor weight,
+                            const Tensor bias, Tensor running_mean,
+                            Tensor running_var, Tensor norm, Tensor std,
+                            Tensor output, float eps, float momentum,
+                            int group_size);
+
+void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
+                            Tensor grad_weight, Tensor grad_bias);
+
+void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
+                           const Tensor grad_weight, const Tensor grad_bias,
+                           const Tensor norm, const Tensor std,
+                           Tensor grad_input);
+
+void ca_forward(const Tensor t, const Tensor f, Tensor weight);
+
+void ca_backward(const Tensor dw, const Tensor t, const Tensor f, Tensor dt,
+                 Tensor df);
+
+void ca_map_forward(const Tensor weight, const Tensor g, Tensor out);
+
+void ca_map_backward(const Tensor dout, const Tensor weight, const Tensor g,
+                     Tensor dw, Tensor dg);
+
+void psamask_forward(const Tensor input, Tensor output, const int psa_type,
+                     const int num_, const int h_feature, const int w_feature,
+                     const int h_mask, const int w_mask, const int half_h_mask,
+                     const int half_w_mask);
+
+void psamask_backward(Tensor grad_output, const Tensor grad_input,
+                      const int psa_type, const int num_, const int h_feature,
+                      const int w_feature, const int h_mask, const int w_mask,
+                      const int half_h_mask, const int half_w_mask);
+
+void tin_shift_forward(Tensor input, Tensor shift, Tensor output);
+
+void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input);
+
+Tensor bottom_pool_forward(Tensor input);
+
+Tensor bottom_pool_backward(Tensor input, Tensor grad_output);
+
+Tensor left_pool_forward(Tensor input);
+
+Tensor left_pool_backward(Tensor input, Tensor grad_output);
+
+Tensor right_pool_forward(Tensor input);
+
+Tensor right_pool_backward(Tensor input, Tensor grad_output);
+
+Tensor top_pool_forward(Tensor input);
+
+Tensor top_pool_backward(Tensor input, Tensor grad_output);
+
+void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
+                     const int mode_flag, const bool aligned);
+
+Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
+                   const Tensor dets_sorted, const float iou_threshold,
+                   const int multi_label);
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
+  m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
+        "get_compiling_cuda_version");
+  m.def("carafe_naive_forward", &carafe_naive_forward, "carafe_naive_forward",
+        py::arg("features"), py::arg("masks"), py::arg("output"),
+        py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
+  m.def("carafe_naive_backward", &carafe_naive_backward,
+        "carafe_naive_backward", py::arg("top_grad"), py::arg("features"),
+        py::arg("masks"), py::arg("bottom_grad"), py::arg("mask_grad"),
+        py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
+  m.def("carafe_forward", &carafe_forward, "carafe_forward",
+        py::arg("features"), py::arg("masks"), py::arg("rfeatures"),
+        py::arg("routput"), py::arg("rmasks"), py::arg("output"),
+        py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
+  m.def("carafe_backward", &carafe_backward, "carafe_backward",
+        py::arg("top_grad"), py::arg("rfeatures"), py::arg("masks"),
+        py::arg("rtop_grad"), py::arg("rbottom_grad_hs"),
+        py::arg("rbottom_grad"), py::arg("rmask_grad"), py::arg("bottom_grad"),
+        py::arg("mask_grad"), py::arg("kernel_size"), py::arg("group_size"),
+        py::arg("scale_factor"));
+  m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward",
+        py::arg("input"), py::arg("weight"), py::arg("offset"),
+        py::arg("output"), py::arg("columns"), py::arg("ones"), py::arg("kW"),
+        py::arg("kH"), py::arg("dW"), py::arg("dH"), py::arg("padH"),
+        py::arg("padW"), py::arg("dilationW"), py::arg("dilationH"),
+        py::arg("group"), py::arg("deformable_group"), py::arg("im2col_step"));
+  m.def("deform_conv_backward_input", &deform_conv_backward_input,
+        "deform_conv_backward_input", py::arg("input"), py::arg("offset"),
+        py::arg("gradOutput"), py::arg("gradInput"), py::arg("gradOffset"),
+        py::arg("weight"), py::arg("columns"), py::arg("kW"), py::arg("kH"),
+        py::arg("dW"), py::arg("dH"), py::arg("padH"), py::arg("padW"),
+        py::arg("dilationW"), py::arg("dilationH"), py::arg("group"),
+        py::arg("deformable_group"), py::arg("im2col_step"));
+  m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters,
+        "deform_conv_backward_parameters", py::arg("input"), py::arg("offset"),
+        py::arg("gradOutput"), py::arg("gradWeight"), py::arg("columns"),
+        py::arg("ones"), py::arg("kW"), py::arg("kH"), py::arg("dW"),
+        py::arg("dH"), py::arg("padH"), py::arg("padW"), py::arg("dilationW"),
+        py::arg("dilationH"), py::arg("group"), py::arg("deformable_group"),
+        py::arg("scale"), py::arg("im2col_step"));
+  m.def("deform_roi_pool_forward", &deform_roi_pool_forward,
+        "deform roi pool forward", py::arg("input"), py::arg("rois"),
+        py::arg("offset"), py::arg("output"), py::arg("pooled_height"),
+        py::arg("pooled_width"), py::arg("spatial_scale"),
+        py::arg("sampling_ratio"), py::arg("gamma"));
+  m.def("deform_roi_pool_backward", &deform_roi_pool_backward,
+        "deform roi pool backward", py::arg("grad_output"), py::arg("input"),
+        py::arg("rois"), py::arg("offset"), py::arg("grad_input"),
+        py::arg("grad_offset"), py::arg("pooled_height"),
+        py::arg("pooled_width"), py::arg("spatial_scale"),
+        py::arg("sampling_ratio"), py::arg("gamma"));
+  m.def("sigmoid_focal_loss_forward", &sigmoid_focal_loss_forward,
+        "sigmoid_focal_loss_forward ", py::arg("input"), py::arg("target"),
+        py::arg("weight"), py::arg("output"), py::arg("gamma"),
+        py::arg("alpha"));
+  m.def("sigmoid_focal_loss_backward", &sigmoid_focal_loss_backward,
+        "sigmoid_focal_loss_backward", py::arg("input"), py::arg("target"),
+        py::arg("weight"), py::arg("grad_input"), py::arg("gamma"),
+        py::arg("alpha"));
+  m.def("softmax_focal_loss_forward", &softmax_focal_loss_forward,
+        "softmax_focal_loss_forward", py::arg("input"), py::arg("target"),
+        py::arg("weight"), py::arg("output"), py::arg("gamma"),
+        py::arg("alpha"));
+  m.def("softmax_focal_loss_backward", &softmax_focal_loss_backward,
+        "softmax_focal_loss_backward", py::arg("input"), py::arg("target"),
+        py::arg("weight"), py::arg("buff"), py::arg("grad_input"),
+        py::arg("gamma"), py::arg("alpha"));
+  m.def("bbox_overlaps", &bbox_overlaps, "bbox_overlaps", py::arg("bboxes1"),
+        py::arg("bboxes2"), py::arg("ious"), py::arg("mode"),
+        py::arg("aligned"), py::arg("offset"));
+  m.def("masked_im2col_forward", &masked_im2col_forward,
+        "masked_im2col_forward", py::arg("im"), py::arg("mask_h_idx"),
+        py::arg("mask_w_idx"), py::arg("col"), py::arg("kernel_h"),
+        py::arg("kernel_w"), py::arg("pad_h"), py::arg("pad_w"));
+  m.def("masked_col2im_forward", &masked_col2im_forward,
+        "masked_col2im_forward", py::arg("col"), py::arg("mask_h_idx"),
+        py::arg("mask_w_idx"), py::arg("im"), py::arg("height"),
+        py::arg("width"), py::arg("channels"));
+  m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward,
+        "modulated deform conv forward", py::arg("input"), py::arg("weight"),
+        py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"),
+        py::arg("output"), py::arg("columns"), py::arg("kernel_h"),
+        py::arg("kernel_w"), py::arg("stride_h"), py::arg("stride_w"),
+        py::arg("pad_h"), py::arg("pad_w"), py::arg("dilation_h"),
+        py::arg("dilation_w"), py::arg("group"), py::arg("deformable_group"),
+        py::arg("with_bias"));
+  m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward,
+        "modulated deform conv backward", py::arg("input"), py::arg("weight"),
+        py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"),
+        py::arg("columns"), py::arg("grad_input"), py::arg("grad_weight"),
+        py::arg("grad_bias"), py::arg("grad_offset"), py::arg("grad_mask"),
+        py::arg("grad_output"), py::arg("kernel_h"), py::arg("kernel_w"),
+        py::arg("stride_h"), py::arg("stride_w"), py::arg("pad_h"),
+        py::arg("pad_w"), py::arg("dilation_h"), py::arg("dilation_w"),
+        py::arg("group"), py::arg("deformable_group"), py::arg("with_bias"));
+  m.def("nms", &nms, "nms (CPU/CUDA) ", py::arg("boxes"), py::arg("scores"),
+        py::arg("iou_threshold"), py::arg("offset"));
+  m.def("softnms", &softnms, "softnms (CPU) ", py::arg("boxes"),
+        py::arg("scores"), py::arg("dets"), py::arg("iou_threshold"),
+        py::arg("sigma"), py::arg("min_score"), py::arg("method"),
+        py::arg("offset"));
+  m.def("nms_match", &nms_match, "nms_match (CPU) ", py::arg("dets"),
+        py::arg("iou_threshold"));
+  m.def("roi_align_forward", &roi_align_forward, "roi_align forward",
+        py::arg("input"), py::arg("rois"), py::arg("output"),
+        py::arg("argmax_y"), py::arg("argmax_x"), py::arg("aligned_height"),
+        py::arg("aligned_width"), py::arg("spatial_scale"),
+        py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned"));
+  m.def("roi_align_backward", &roi_align_backward, "roi_align backward",
+        py::arg("grad_output"), py::arg("rois"), py::arg("argmax_y"),
+        py::arg("argmax_x"), py::arg("grad_input"), py::arg("aligned_height"),
+        py::arg("aligned_width"), py::arg("spatial_scale"),
+        py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned"));
+  m.def("roi_pool_forward", &roi_pool_forward, "roi_pool forward",
+        py::arg("input"), py::arg("rois"), py::arg("output"), py::arg("argmax"),
+        py::arg("pooled_height"), py::arg("pooled_width"),
+        py::arg("spatial_scale"));
+  m.def("roi_pool_backward", &roi_pool_backward, "roi_pool backward",
+        py::arg("grad_output"), py::arg("rois"), py::arg("argmax"),
+        py::arg("grad_input"), py::arg("pooled_height"),
+        py::arg("pooled_width"), py::arg("spatial_scale"));
+  m.def("sync_bn_forward_mean", &sync_bn_forward_mean, "sync_bn forward_mean",
+        py::arg("input"), py::arg("mean"));
+  m.def("sync_bn_forward_var", &sync_bn_forward_var, "sync_bn forward_var",
+        py::arg("input"), py::arg("mean"), py::arg("var"));
+  m.def("sync_bn_forward_output", &sync_bn_forward_output,
+        "sync_bn forward_output", py::arg("input"), py::arg("mean"),
+        py::arg("var"), py::arg("weight"), py::arg("bias"),
+        py::arg("running_mean"), py::arg("running_var"), py::arg("norm"),
+        py::arg("std"), py::arg("output"), py::arg("eps"), py::arg("momentum"),
+        py::arg("group_size"));
+  m.def("sync_bn_backward_param", &sync_bn_backward_param,
+        "sync_bn backward_param", py::arg("grad_output"), py::arg("norm"),
+        py::arg("grad_weight"), py::arg("grad_bias"));
+  m.def("sync_bn_backward_data", &sync_bn_backward_data,
+        "sync_bn backward_data", py::arg("grad_output"), py::arg("weight"),
+        py::arg("grad_weight"), py::arg("grad_bias"), py::arg("norm"),
+        py::arg("std"), py::arg("grad_input"));
+  m.def("ca_forward", &ca_forward, "ccattention forward", py::arg("t"),
+        py::arg("f"), py::arg("weight"));
+  m.def("ca_backward", &ca_backward, "ccattention backward", py::arg("dw"),
+        py::arg("t"), py::arg("f"), py::arg("dt"), py::arg("df"));
+  m.def("ca_map_forward", &ca_map_forward, "ccattention map forward",
+        py::arg("weight"), py::arg("g"), py::arg("out"));
+  m.def("ca_map_backward", &ca_map_backward, "ccattention map backward",
+        py::arg("dout"), py::arg("weight"), py::arg("g"), py::arg("dw"),
+        py::arg("dg"));
+  m.def("psamask_forward", &psamask_forward, "PSAMASK forward (CPU/CUDA)",
+        py::arg("input"), py::arg("output"), py::arg("psa_type"),
+        py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"),
+        py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"),
+        py::arg("half_w_mask"));
+  m.def("psamask_backward", &psamask_backward, "PSAMASK backward (CPU/CUDA)",
+        py::arg("grad_output"), py::arg("grad_input"), py::arg("psa_type"),
+        py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"),
+        py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"),
+        py::arg("half_w_mask"));
+  m.def("tin_shift_forward", &tin_shift_forward, "tin_shift forward",
+        py::arg("input"), py::arg("shift"), py::arg("output"));
+  m.def("tin_shift_backward", &tin_shift_backward, "tin_shift backward",
+        py::arg("grad_output"), py::arg("shift"), py::arg("grad_input"));
+  m.def("bottom_pool_forward", &bottom_pool_forward, "Bottom Pool Forward",
+        py::arg("input"), py::call_guard<py::gil_scoped_release>());
+  m.def("bottom_pool_backward", &bottom_pool_backward, "Bottom Pool Backward",
+        py::arg("input"), py::arg("grad_output"),
+        py::call_guard<py::gil_scoped_release>());
+  m.def("left_pool_forward", &left_pool_forward, "Left Pool Forward",
+        py::arg("input"), py::call_guard<py::gil_scoped_release>());
+  m.def("left_pool_backward", &left_pool_backward, "Left Pool Backward",
+        py::arg("input"), py::arg("grad_output"),
+        py::call_guard<py::gil_scoped_release>());
+  m.def("right_pool_forward", &right_pool_forward, "Right Pool Forward",
+        py::arg("input"), py::call_guard<py::gil_scoped_release>());
+  m.def("right_pool_backward", &right_pool_backward, "Right Pool Backward",
+        py::arg("input"), py::arg("grad_output"),
+        py::call_guard<py::gil_scoped_release>());
+  m.def("top_pool_forward", &top_pool_forward, "Top Pool Forward",
+        py::arg("input"), py::call_guard<py::gil_scoped_release>());
+  m.def("top_pool_backward", &top_pool_backward, "Top Pool Backward",
+        py::arg("input"), py::arg("grad_output"),
+        py::call_guard<py::gil_scoped_release>());
+  m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes",
+        py::arg("boxes1"), py::arg("boxes2"), py::arg("ious"),
+        py::arg("mode_flag"), py::arg("aligned"));
+  m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes", py::arg("dets"),
+        py::arg("scores"), py::arg("order"), py::arg("dets_sorted"),
+        py::arg("iou_threshold"), py::arg("multi_label"));
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b79fafbeee642f13edbdaaf721fe6f6da61806ac
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align.cpp
@@ -0,0 +1,129 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
+                                       Tensor argmax_y, Tensor argmax_x,
+                                       int aligned_height, int aligned_width,
+                                       float spatial_scale, int sampling_ratio,
+                                       int pool_mode, bool aligned);
+
+void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
+                                        Tensor argmax_y, Tensor argmax_x,
+                                        Tensor grad_input, int aligned_height,
+                                        int aligned_width, float spatial_scale,
+                                        int sampling_ratio, int pool_mode,
+                                        bool aligned);
+
+void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
+                            Tensor argmax_y, Tensor argmax_x,
+                            int aligned_height, int aligned_width,
+                            float spatial_scale, int sampling_ratio,
+                            int pool_mode, bool aligned) {
+  ROIAlignForwardCUDAKernelLauncher(
+      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
+      spatial_scale, sampling_ratio, pool_mode, aligned);
+}
+
+void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y,
+                             Tensor argmax_x, Tensor grad_input,
+                             int aligned_height, int aligned_width,
+                             float spatial_scale, int sampling_ratio,
+                             int pool_mode, bool aligned) {
+  ROIAlignBackwardCUDAKernelLauncher(
+      grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height,
+      aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned);
+}
+#endif
+
+void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
+                                Tensor argmax_y, Tensor argmax_x,
+                                int aligned_height, int aligned_width,
+                                float spatial_scale, int sampling_ratio,
+                                int pool_mode, bool aligned);
+
+void ROIAlignBackwardCPULauncher(Tensor grad_output, Tensor rois,
+                                 Tensor argmax_y, Tensor argmax_x,
+                                 Tensor grad_input, int aligned_height,
+                                 int aligned_width, float spatial_scale,
+                                 int sampling_ratio, int pool_mode,
+                                 bool aligned);
+
+void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output,
+                           Tensor argmax_y, Tensor argmax_x, int aligned_height,
+                           int aligned_width, float spatial_scale,
+                           int sampling_ratio, int pool_mode, bool aligned) {
+  ROIAlignForwardCPULauncher(input, rois, output, argmax_y, argmax_x,
+                             aligned_height, aligned_width, spatial_scale,
+                             sampling_ratio, pool_mode, aligned);
+}
+
+void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y,
+                            Tensor argmax_x, Tensor grad_input,
+                            int aligned_height, int aligned_width,
+                            float spatial_scale, int sampling_ratio,
+                            int pool_mode, bool aligned) {
+  ROIAlignBackwardCPULauncher(grad_output, rois, argmax_y, argmax_x, grad_input,
+                              aligned_height, aligned_width, spatial_scale,
+                              sampling_ratio, pool_mode, aligned);
+}
+
+void roi_align_forward(Tensor input, Tensor rois, Tensor output,
+                       Tensor argmax_y, Tensor argmax_x, int aligned_height,
+                       int aligned_width, float spatial_scale,
+                       int sampling_ratio, int pool_mode, bool aligned) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(rois);
+    CHECK_CUDA_INPUT(output);
+    CHECK_CUDA_INPUT(argmax_y);
+    CHECK_CUDA_INPUT(argmax_x);
+
+    roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x,
+                           aligned_height, aligned_width, spatial_scale,
+                           sampling_ratio, pool_mode, aligned);
+#else
+    AT_ERROR("RoIAlign is not compiled with GPU support");
+#endif
+  } else {
+    CHECK_CPU_INPUT(input);
+    CHECK_CPU_INPUT(rois);
+    CHECK_CPU_INPUT(output);
+    CHECK_CPU_INPUT(argmax_y);
+    CHECK_CPU_INPUT(argmax_x);
+    roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x,
+                          aligned_height, aligned_width, spatial_scale,
+                          sampling_ratio, pool_mode, aligned);
+  }
+}
+
+void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
+                        Tensor argmax_x, Tensor grad_input, int aligned_height,
+                        int aligned_width, float spatial_scale,
+                        int sampling_ratio, int pool_mode, bool aligned) {
+  if (grad_output.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_output);
+    CHECK_CUDA_INPUT(rois);
+    CHECK_CUDA_INPUT(argmax_y);
+    CHECK_CUDA_INPUT(argmax_x);
+    CHECK_CUDA_INPUT(grad_input);
+
+    roi_align_backward_cuda(grad_output, rois, argmax_y, argmax_x, grad_input,
+                            aligned_height, aligned_width, spatial_scale,
+                            sampling_ratio, pool_mode, aligned);
+#else
+    AT_ERROR("RoIAlign is not compiled with GPU support");
+#endif
+  } else {
+    CHECK_CPU_INPUT(grad_output);
+    CHECK_CPU_INPUT(rois);
+    CHECK_CPU_INPUT(argmax_y);
+    CHECK_CPU_INPUT(argmax_x);
+    CHECK_CPU_INPUT(grad_input);
+
+    roi_align_backward_cpu(grad_output, rois, argmax_y, argmax_x, grad_input,
+                           aligned_height, aligned_width, spatial_scale,
+                           sampling_ratio, pool_mode, aligned);
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ac64eb31d21ff785c9758642515567a53967db59
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp
@@ -0,0 +1,430 @@
+// Modified from
+// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+#include <ATen/ATen.h>
+#include <ATen/TensorUtils.h>
+
+#include "../pytorch_cpp_helper.hpp"
+
+// implementation taken from Caffe2
+template <typename T>
+struct PreCalc {
+  int pos1;
+  int pos2;
+  int pos3;
+  int pos4;
+  T w1;
+  T w2;
+  T w3;
+  T w4;
+};
+
+template <typename T>
+void pre_calc_for_bilinear_interpolate(
+    const int height, const int width, const int pooled_height,
+    const int pooled_width, const int iy_upper, const int ix_upper,
+    T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w,
+    int roi_bin_grid_h, int roi_bin_grid_w, std::vector<PreCalc<T>>& pre_calc) {
+  int pre_calc_index = 0;
+  for (int ph = 0; ph < pooled_height; ph++) {
+    for (int pw = 0; pw < pooled_width; pw++) {
+      for (int iy = 0; iy < iy_upper; iy++) {
+        const T yy = roi_start_h + ph * bin_size_h +
+                     static_cast<T>(iy + .5f) * bin_size_h /
+                         static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
+        for (int ix = 0; ix < ix_upper; ix++) {
+          const T xx = roi_start_w + pw * bin_size_w +
+                       static_cast<T>(ix + .5f) * bin_size_w /
+                           static_cast<T>(roi_bin_grid_w);
+
+          T x = xx;
+          T y = yy;
+          // deal with: inverse elements are out of feature map boundary
+          if (y < -1.0 || y > height || x < -1.0 || x > width) {
+            // empty
+            PreCalc<T> pc;
+            pc.pos1 = 0;
+            pc.pos2 = 0;
+            pc.pos3 = 0;
+            pc.pos4 = 0;
+            pc.w1 = 0;
+            pc.w2 = 0;
+            pc.w3 = 0;
+            pc.w4 = 0;
+            pre_calc[pre_calc_index] = pc;
+            pre_calc_index += 1;
+            continue;
+          }
+
+          if (y <= 0) {
+            y = 0;
+          }
+          if (x <= 0) {
+            x = 0;
+          }
+
+          int y_low = (int)y;
+          int x_low = (int)x;
+          int y_high;
+          int x_high;
+
+          if (y_low >= height - 1) {
+            y_high = y_low = height - 1;
+            y = (T)y_low;
+          } else {
+            y_high = y_low + 1;
+          }
+
+          if (x_low >= width - 1) {
+            x_high = x_low = width - 1;
+            x = (T)x_low;
+          } else {
+            x_high = x_low + 1;
+          }
+
+          T ly = y - y_low;
+          T lx = x - x_low;
+          T hy = 1. - ly, hx = 1. - lx;
+          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+          // save weights and indices
+          PreCalc<T> pc;
+          pc.pos1 = y_low * width + x_low;
+          pc.pos2 = y_low * width + x_high;
+          pc.pos3 = y_high * width + x_low;
+          pc.pos4 = y_high * width + x_high;
+          pc.w1 = w1;
+          pc.w2 = w2;
+          pc.w3 = w3;
+          pc.w4 = w4;
+          pre_calc[pre_calc_index] = pc;
+
+          pre_calc_index += 1;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+void ROIAlignForward(const int nthreads, const T* input, const T* rois,
+                     T* output, T* argmax_y, T* argmax_x,
+                     const int pooled_height, const int pooled_width,
+                     const T spatial_scale, const int sampling_ratio,
+                     const int pool_mode,  // 0 - max pool, 1 - avg pool
+                     const bool aligned, const int channels, const int height,
+                     const int width) {
+  int n_rois = nthreads / channels / pooled_width / pooled_height;
+  // (n, c, ph, pw) is an element in the pooled output
+  // can be parallelized using omp
+  // #pragma omp parallel for num_threads(32)
+  for (int n = 0; n < n_rois; n++) {
+    int index_n = n * channels * pooled_width * pooled_height;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_start_w = offset_rois[1] * spatial_scale - offset;
+    T roi_start_h = offset_rois[2] * spatial_scale - offset;
+    T roi_end_w = offset_rois[3] * spatial_scale - offset;
+    T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (aligned) {
+      AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
+                 "ROIs in ROIAlign cannot have non-negative size!");
+    } else {  // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : ceil(roi_height / pooled_height);  // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+    // When the grid is empty, output zeros == 0/1, instead of NaN.
+    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4
+
+    // we want to precalculate indices and weights shared by all channels,
+    // this is the key point of optimization
+    std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
+                                     pooled_width * pooled_height);
+    pre_calc_for_bilinear_interpolate(
+        height, width, pooled_height, pooled_width, roi_bin_grid_h,
+        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
+        roi_bin_grid_h, roi_bin_grid_w, pre_calc);
+
+    for (int c = 0; c < channels; c++) {
+      int index_n_c = index_n + c * pooled_width * pooled_height;
+      const T* offset_input =
+          input + (roi_batch_ind * channels + c) * height * width;
+      int pre_calc_index = 0;
+
+      for (int ph = 0; ph < pooled_height; ph++) {
+        for (int pw = 0; pw < pooled_width; pw++) {
+          int index = index_n_c + ph * pooled_width + pw;
+
+          T output_val = 0.;
+          T maxval = -10000;
+          T maxidx_y = -1.f, maxidx_x = -1.f;
+          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+            const T y = roi_start_h + ph * bin_size_h +
+                        static_cast<T>(iy + .5f) * bin_size_h /
+                            static_cast<T>(roi_bin_grid_h);
+            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+              const T x = roi_start_w + pw * bin_size_w +
+                          static_cast<T>(ix + .5f) * bin_size_w /
+                              static_cast<T>(roi_bin_grid_w);
+              PreCalc<T> pc = pre_calc[pre_calc_index];
+              T val = pc.w1 * offset_input[pc.pos1] +
+                      pc.w2 * offset_input[pc.pos2] +
+                      pc.w3 * offset_input[pc.pos3] +
+                      pc.w4 * offset_input[pc.pos4];
+              if (val > maxval) {
+                maxval = val;
+                maxidx_y = y;
+                maxidx_x = x;
+              }
+              output_val += val;
+              pre_calc_index += 1;
+            }
+          }
+          if (pool_mode == 0) {
+            // We do max pooling inside a bin
+            output[index] = maxval;
+            argmax_y[index] = maxidx_y;
+            argmax_x[index] = maxidx_x;
+          } else if (pool_mode == 1) {
+            // We do average (integral) pooling inside a bin
+            output[index] = output_val / count;
+          }  // if
+        }    // for pw
+      }      // for ph
+    }        // for c
+  }          // for n
+}
+
+template <typename T>
+void bilinear_interpolate_gradient(const int height, const int width, T y, T x,
+                                   T& w1, T& w2, T& w3, T& w4, int& x_low,
+                                   int& x_high, int& y_low, int& y_high,
+                                   const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;
+
+  y_low = (int)y;
+  x_low = (int)x;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  return;
+}
+
+template <class T>
+inline void add(T* address, const T& val) {
+  *address += val;
+}
+
+template <typename T>
+void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois,
+                      const T* argmax_y, const T* argmax_x, T* grad_input,
+                      const int pooled_height, const int pooled_width,
+                      const T spatial_scale, const int sampling_ratio,
+                      const int pool_mode,  // 0 - max pool, 1 - avg pool
+                      const bool aligned, const int channels, const int height,
+                      const int width, const int n_stride, const int c_stride,
+                      const int h_stride, const int w_stride) {
+  for (int index = 0; index < nthreads; index++) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_start_w = offset_rois[1] * spatial_scale - offset;
+    T roi_start_h = offset_rois[2] * spatial_scale - offset;
+    T roi_end_w = offset_rois[3] * spatial_scale - offset;
+    T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (aligned) {
+      AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
+                 "ROIs in ROIAlign do not have non-negative size!");
+    } else {  // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    T* offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+    int output_offset = n * n_stride + c * c_stride;
+    const T* offset_grad_output = grad_output + output_offset;
+    const T grad_output_this_bin =
+        offset_grad_output[ph * h_stride + pw * w_stride];
+
+    if (pool_mode == 0) {
+      // We do max pooling inside a bin
+      T y = argmax_y[index], x = argmax_x[index];
+      if (y != -1.f) {
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                      x_low, x_high, y_low, y_high, index);
+
+        T g1 = grad_output_this_bin * w1;
+        T g2 = grad_output_this_bin * w2;
+        T g3 = grad_output_this_bin * w3;
+        T g4 = grad_output_this_bin * w4;
+
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          // atomic add is not needed for now since it is single threaded
+          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
+        }  // if
+      }    // mode
+    } else if (pool_mode == 1) {
+      // We do average (integral) pooling inside a bin
+      // We use roi_bin_grid to sample the grid and mimic integral
+      int roi_bin_grid_h = (sampling_ratio > 0)
+                               ? sampling_ratio
+                               : ceil(roi_height / pooled_height);  // e.g., = 2
+      int roi_bin_grid_w = (sampling_ratio > 0)
+                               ? sampling_ratio
+                               : ceil(roi_width / pooled_width);
+
+      const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4
+      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+        const T y = roi_start_h + ph * bin_size_h +
+                    static_cast<T>(iy + .5f) * bin_size_h /
+                        static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
+        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+          const T x = roi_start_w + pw * bin_size_w +
+                      static_cast<T>(ix + .5f) * bin_size_w /
+                          static_cast<T>(roi_bin_grid_w);
+
+          T w1, w2, w3, w4;
+          int x_low, x_high, y_low, y_high;
+
+          bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                        x_low, x_high, y_low, y_high, index);
+
+          T g1 = grad_output_this_bin * w1 / count;
+          T g2 = grad_output_this_bin * w2 / count;
+          T g3 = grad_output_this_bin * w3 / count;
+          T g4 = grad_output_this_bin * w4 / count;
+
+          if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+            // atomic add is not needed for now since it is single threaded
+            add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+            add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+            add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+            add(offset_grad_input + y_high * width + x_high,
+                static_cast<T>(g4));
+          }  // if
+        }    // ix
+      }      // iy
+    }        // mode
+  }          // for
+}  // ROIAlignBackward
+
+void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
+                                Tensor argmax_y, Tensor argmax_x,
+                                int aligned_height, int aligned_width,
+                                float spatial_scale, int sampling_ratio,
+                                int pool_mode, bool aligned) {
+  int output_size = output.numel();
+  int channels = input.size(1);
+  int height = input.size(2);
+  int width = input.size(3);
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "ROIAlign_forward", [&] {
+        ROIAlignForward<scalar_t>(
+            output_size, input.data_ptr<scalar_t>(), rois.data_ptr<scalar_t>(),
+            output.data_ptr<scalar_t>(), argmax_y.data_ptr<scalar_t>(),
+            argmax_x.data_ptr<scalar_t>(), aligned_height, aligned_width,
+            static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+            aligned, channels, height, width);
+      });
+}
+
+void ROIAlignBackwardCPULauncher(Tensor grad_output, Tensor rois,
+                                 Tensor argmax_y, Tensor argmax_x,
+                                 Tensor grad_input, int aligned_height,
+                                 int aligned_width, float spatial_scale,
+                                 int sampling_ratio, int pool_mode,
+                                 bool aligned) {
+  int output_size = grad_output.numel();
+  int channels = grad_input.size(1);
+  int height = grad_input.size(2);
+  int width = grad_input.size(3);
+
+  // get stride values to ensure indexing into gradients is correct.
+  int n_stride = grad_output.stride(0);
+  int c_stride = grad_output.stride(1);
+  int h_stride = grad_output.stride(2);
+  int w_stride = grad_output.stride(3);
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "ROIAlign_backward", [&] {
+        ROIAlignBackward<scalar_t>(
+            output_size, grad_output.data_ptr<scalar_t>(),
+            rois.data_ptr<scalar_t>(), argmax_y.data_ptr<scalar_t>(),
+            argmax_x.data_ptr<scalar_t>(), grad_input.data_ptr<scalar_t>(),
+            aligned_height, aligned_width, static_cast<scalar_t>(spatial_scale),
+            sampling_ratio, pool_mode, aligned, channels, height, width,
+            n_stride, c_stride, h_stride, w_stride);
+      });
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..822e9183f34c1ab545adb0bb249ba35162d8ec18
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_align_cuda.cu
@@ -0,0 +1,57 @@
+#include "pytorch_cuda_helper.hpp"
+#include "roi_align_cuda_kernel.cuh"
+
+void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
+                                       Tensor argmax_y, Tensor argmax_x,
+                                       int aligned_height, int aligned_width,
+                                       float spatial_scale, int sampling_ratio,
+                                       int pool_mode, bool aligned) {
+  int output_size = output.numel();
+  int channels = input.size(1);
+  int height = input.size(2);
+  int width = input.size(3);
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "roi_align_forward_cuda_kernel", [&] {
+        roi_align_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.data_ptr<scalar_t>(),
+                rois.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+                argmax_y.data_ptr<scalar_t>(), argmax_x.data_ptr<scalar_t>(),
+                aligned_height, aligned_width,
+                static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+                aligned, channels, height, width);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
+                                        Tensor argmax_y, Tensor argmax_x,
+                                        Tensor grad_input, int aligned_height,
+                                        int aligned_width, float spatial_scale,
+                                        int sampling_ratio, int pool_mode,
+                                        bool aligned) {
+  int output_size = grad_output.numel();
+  int channels = grad_input.size(1);
+  int height = grad_input.size(2);
+  int width = grad_input.size(3);
+
+  at::cuda::CUDAGuard device_guard(grad_output.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "roi_align_backward_cuda_kernel", [&] {
+        roi_align_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.data_ptr<scalar_t>(),
+                rois.data_ptr<scalar_t>(), argmax_y.data_ptr<scalar_t>(),
+                argmax_x.data_ptr<scalar_t>(), grad_input.data_ptr<scalar_t>(),
+                aligned_height, aligned_width,
+                static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
+                aligned, channels, height, width);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_pool.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e41270512661603b59b240eb3d6a42b785d01509
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_pool.cpp
@@ -0,0 +1,66 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
+                                      Tensor argmax, int pooled_height,
+                                      int pooled_width, float spatial_scale);
+
+void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
+                                       Tensor argmax, Tensor grad_input,
+                                       int pooled_height, int pooled_width,
+                                       float spatial_scale);
+
+void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
+                           Tensor argmax, int pooled_height, int pooled_width,
+                           float spatial_scale) {
+  ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height,
+                                   pooled_width, spatial_scale);
+}
+
+void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
+                            Tensor grad_input, int pooled_height,
+                            int pooled_width, float spatial_scale) {
+  ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input,
+                                    pooled_height, pooled_width, spatial_scale);
+}
+#endif
+
+void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
+                      int pooled_height, int pooled_width,
+                      float spatial_scale) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(rois);
+    CHECK_CUDA_INPUT(output);
+    CHECK_CUDA_INPUT(argmax);
+
+    roi_pool_forward_cuda(input, rois, output, argmax, pooled_height,
+                          pooled_width, spatial_scale);
+#else
+    AT_ERROR("RoIPool is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("RoIPool is not implemented on CPU");
+  }
+}
+
+void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
+                       Tensor grad_input, int pooled_height, int pooled_width,
+                       float spatial_scale) {
+  if (grad_output.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_output);
+    CHECK_CUDA_INPUT(rois);
+    CHECK_CUDA_INPUT(argmax);
+    CHECK_CUDA_INPUT(grad_input);
+
+    roi_pool_backward_cuda(grad_output, rois, argmax, grad_input, pooled_height,
+                           pooled_width, spatial_scale);
+#else
+    AT_ERROR("RoIPool is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("RoIPool is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..313f1d74c81b948a959432324f75e7c41df2b2d0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu
@@ -0,0 +1,49 @@
+#include "pytorch_cuda_helper.hpp"
+#include "roi_pool_cuda_kernel.cuh"
+
+void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
+                                      Tensor argmax, int pooled_height,
+                                      int pooled_width, float spatial_scale) {
+  int output_size = output.numel();
+  int channels = input.size(1);
+  int height = input.size(2);
+  int width = input.size(3);
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "roi_pool_forward_cuda_kernel", [&] {
+        roi_pool_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.data_ptr<scalar_t>(),
+                rois.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+                argmax.data_ptr<int>(), pooled_height, pooled_width,
+                static_cast<scalar_t>(spatial_scale), channels, height, width);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
+                                       Tensor argmax, Tensor grad_input,
+                                       int pooled_height, int pooled_width,
+                                       float spatial_scale) {
+  int output_size = grad_output.numel();
+  int channels = grad_input.size(1);
+  int height = grad_input.size(2);
+  int width = grad_input.size(3);
+
+  at::cuda::CUDAGuard device_guard(grad_output.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "roi_pool_backward_cuda_kernel", [&] {
+        roi_pool_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.data_ptr<scalar_t>(),
+                rois.data_ptr<scalar_t>(), argmax.data_ptr<int>(),
+                grad_input.data_ptr<scalar_t>(), pooled_height, pooled_width,
+                channels, height, width);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/sync_bn.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/sync_bn.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b8b29a8b180169d2ba42cbd1179333da38635770
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/sync_bn.cpp
@@ -0,0 +1,158 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean);
+
+void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
+                                        Tensor var);
+
+void SyncBNForwardOutputCUDAKernelLauncher(
+    const Tensor input, const Tensor mean, const Tensor var,
+    Tensor running_mean, Tensor running_var, const Tensor weight,
+    const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
+    float momentum, int group_size);
+
+void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
+                                           const Tensor norm,
+                                           Tensor grad_weight,
+                                           Tensor grad_bias);
+
+void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
+                                          const Tensor weight,
+                                          const Tensor grad_weight,
+                                          const Tensor grad_bias,
+                                          const Tensor norm, const Tensor std,
+                                          Tensor grad_input);
+
+void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) {
+  SyncBNForwardMeanCUDAKernelLauncher(input, mean);
+}
+
+void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
+                              Tensor var) {
+  SyncBNForwardVarCUDAKernelLauncher(input, mean, var);
+}
+
+void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
+                                 const Tensor var, Tensor running_mean,
+                                 Tensor running_var, const Tensor weight,
+                                 const Tensor bias, Tensor norm, Tensor std,
+                                 Tensor output, float eps, float momentum,
+                                 int group_size) {
+  SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean,
+                                        running_var, weight, bias, norm, std,
+                                        output, eps, momentum, group_size);
+}
+
+void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
+                                 Tensor grad_weight, Tensor grad_bias) {
+  SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight,
+                                        grad_bias);
+}
+
+void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
+                                const Tensor grad_weight,
+                                const Tensor grad_bias, const Tensor norm,
+                                const Tensor std, Tensor grad_input) {
+  SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight,
+                                       grad_bias, norm, std, grad_input);
+}
+#endif
+
+void sync_bn_forward_mean(const Tensor input, Tensor mean) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(mean);
+    sync_bn_forward_mean_cuda(input, mean);
+#else
+    AT_ERROR("SyncBatchNorm is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SyncBatchNorm is not implemented on CPU");
+  }
+}
+
+void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(mean);
+    CHECK_CUDA_INPUT(var);
+    sync_bn_forward_var_cuda(input, mean, var);
+#else
+    AT_ERROR("SyncBatchNorm is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SyncBatchNorm is not implemented on CPU");
+  }
+}
+
+void sync_bn_forward_output(const Tensor input, const Tensor mean,
+                            const Tensor var, const Tensor weight,
+                            const Tensor bias, Tensor running_mean,
+                            Tensor running_var, Tensor norm, Tensor std,
+                            Tensor output, float eps, float momentum,
+                            int group_size) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(mean);
+    CHECK_CUDA_INPUT(var);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(bias);
+    CHECK_CUDA_INPUT(running_mean);
+    CHECK_CUDA_INPUT(running_var);
+    CHECK_CUDA_INPUT(norm);
+    CHECK_CUDA_INPUT(std);
+    CHECK_CUDA_INPUT(output);
+    sync_bn_forward_output_cuda(input, mean, var, running_mean, running_var,
+                                weight, bias, norm, std, output, eps, momentum,
+                                group_size);
+#else
+    AT_ERROR("SyncBatchNorm is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SyncBatchNorm is not implemented on CPU");
+  }
+}
+
+void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
+                            Tensor grad_weight, Tensor grad_bias) {
+  if (grad_output.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_output);
+    CHECK_CUDA_INPUT(norm);
+    CHECK_CUDA_INPUT(grad_weight);
+    CHECK_CUDA_INPUT(grad_bias);
+    sync_bn_backward_param_cuda(grad_output, norm, grad_weight, grad_bias);
+#else
+    AT_ERROR("SyncBatchNorm is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SyncBatchNorm is not implemented on CPU");
+  }
+}
+
+void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
+                           const Tensor grad_weight, const Tensor grad_bias,
+                           const Tensor norm, const Tensor std,
+                           Tensor grad_input) {
+  if (grad_output.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_output);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(grad_weight);
+    CHECK_CUDA_INPUT(grad_bias);
+    CHECK_CUDA_INPUT(norm);
+    CHECK_CUDA_INPUT(std);
+    CHECK_CUDA_INPUT(grad_input);
+    sync_bn_backward_data_cuda(grad_output, weight, grad_weight, grad_bias,
+                               norm, std, grad_input);
+#else
+    AT_ERROR("SyncBatchNorm is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("SyncBatchNorm is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..334d9e0015a9f74f4e69bad7684fd365e5f6cf22
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu
@@ -0,0 +1,109 @@
+#include "pytorch_cuda_helper.hpp"
+#include "sync_bn_cuda_kernel.cuh"
+
+void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean) {
+  int num = input.size(0);
+  int channels = input.size(1);
+  int spatial = input.size(2);
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] {
+        sync_bn_forward_mean_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                input.data_ptr<scalar_t>(), mean.data_ptr<float>(), num,
+                channels, spatial);
+      });
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
+                                        Tensor var) {
+  int num = input.size(0);
+  int channels = input.size(1);
+  int spatial = input.size(2);
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] {
+        sync_bn_forward_var_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                input.data_ptr<scalar_t>(), mean.data_ptr<float>(),
+                var.data_ptr<float>(), num, channels, spatial);
+      });
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNForwardOutputCUDAKernelLauncher(
+    const Tensor input, const Tensor mean, const Tensor var,
+    Tensor running_mean, Tensor running_var, const Tensor weight,
+    const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
+    float momentum, int group_size) {
+  int num = input.size(0);
+  int channels = input.size(1);
+  int spatial = input.size(2);
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] {
+        sync_bn_forward_output_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                input.data_ptr<scalar_t>(), mean.data_ptr<float>(),
+                var.data_ptr<float>(), running_mean.data_ptr<float>(),
+                running_var.data_ptr<float>(), weight.data_ptr<float>(),
+                bias.data_ptr<float>(), norm.data_ptr<float>(),
+                std.data_ptr<float>(), output.data_ptr<scalar_t>(), num,
+                channels, spatial, eps, momentum, group_size);
+      });
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
+                                           const Tensor norm,
+                                           Tensor grad_weight,
+                                           Tensor grad_bias) {
+  int num = grad_output.size(0);
+  int channels = grad_output.size(1);
+  int spatial = grad_output.size(2);
+
+  at::cuda::CUDAGuard device_guard(grad_output.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "sync_bn_backward_param_cuda_kernel", [&] {
+        sync_bn_backward_param_cuda_kernel<scalar_t>
+            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
+                grad_output.data_ptr<scalar_t>(), norm.data_ptr<float>(),
+                grad_weight.data_ptr<float>(), grad_bias.data_ptr<float>(), num,
+                channels, spatial);
+      });
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
+                                          const Tensor weight,
+                                          const Tensor grad_weight,
+                                          const Tensor grad_bias,
+                                          const Tensor norm, const Tensor std,
+                                          Tensor grad_input) {
+  int output_size = grad_input.numel();
+  int num = grad_input.size(0);
+  int channels = grad_input.size(1);
+  int spatial = grad_input.size(2);
+
+  at::cuda::CUDAGuard device_guard(grad_input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "sync_bn_backward_data_cuda_kernel", [&] {
+        sync_bn_backward_data_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.data_ptr<scalar_t>(),
+                weight.data_ptr<float>(), grad_weight.data_ptr<float>(),
+                grad_bias.data_ptr<float>(), norm.data_ptr<float>(),
+                std.data_ptr<float>(), grad_input.data_ptr<scalar_t>(), num,
+                channels, spatial);
+      });
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/tin_shift.cpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/tin_shift.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..255ce4fffe593d68d60b09a81ac61fa08d7d2805
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/tin_shift.cpp
@@ -0,0 +1,51 @@
+#include "pytorch_cpp_helper.hpp"
+
+#ifdef MMCV_WITH_CUDA
+void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
+                                       Tensor output);
+
+void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
+                                        Tensor grad_input);
+
+void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
+  TINShiftForwardCUDAKernelLauncher(input, shift, output);
+}
+
+void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
+                             Tensor grad_input) {
+  TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input);
+}
+
+#endif
+
+void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(shift);
+    CHECK_CUDA_INPUT(output);
+
+    tin_shift_forward_cuda(input, shift, output);
+#else
+    AT_ERROR("TINShift is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("TINShift is not implemented on CPU");
+  }
+}
+
+void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
+  if (grad_output.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(grad_output);
+    CHECK_CUDA_INPUT(shift);
+    CHECK_CUDA_INPUT(grad_input);
+
+    tin_shift_backward_cuda(grad_output, shift, grad_input);
+#else
+    AT_ERROR("TINShift is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("TINShift is not implemented on CPU");
+  }
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..996816ee4ff5c3ee605948fe879a862264f34acf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu
@@ -0,0 +1,53 @@
+#include "pytorch_cuda_helper.hpp"
+#include "tin_shift_cuda_kernel.cuh"
+
+void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
+                                       Tensor output) {
+  int output_size = output.numel();
+  int batch_size = input.size(0);
+  int t_size = input.size(1);
+  int channels = input.size(2);
+  int hw_size = input.size(3);
+  int group_size = shift.size(1);
+  int group_channel = channels / group_size;
+  int num_kernels = batch_size * hw_size * channels;
+
+  at::cuda::CUDAGuard device_guard(input.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "tin_shift_forward_cuda_kernel", [&] {
+        tin_shift_forward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, input.data_ptr<scalar_t>(), shift.data_ptr<int>(),
+                output.data_ptr<scalar_t>(), batch_size, channels, t_size,
+                hw_size, group_size, group_channel);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
+
+void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
+                                        Tensor grad_input) {
+  int output_size = grad_output.numel();
+  int batch_size = grad_output.size(0);
+  int t_size = grad_output.size(1);
+  int channels = grad_output.size(2);
+  int hw_size = grad_output.size(3);
+  int group_size = shift.size(1);
+  int group_channel = channels / group_size;
+  int num_kernels = batch_size * hw_size * channels;
+
+  at::cuda::CUDAGuard device_guard(grad_output.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad_output.scalar_type(), "tin_shift_backward_cuda_kernel", [&] {
+        tin_shift_backward_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
+                output_size, grad_output.data_ptr<scalar_t>(),
+                shift.data_ptr<int>(), grad_input.data_ptr<scalar_t>(),
+                batch_size, channels, t_size, hw_size, group_size,
+                group_channel);
+      });
+
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch_cpp_helper.hpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch_cpp_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b812e62713e6c028b49a801a4319e2817e751519
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch_cpp_helper.hpp
@@ -0,0 +1,22 @@
+#ifndef PYTORCH_CPP_HELPER
+#define PYTORCH_CPP_HELPER
+#include <torch/extension.h>
+
+#include <vector>
+
+using namespace at;
+
+#define CHECK_CUDA(x) \
+  TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CPU(x) \
+  TORCH_CHECK(!x.device().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_CUDA_INPUT(x) \
+  CHECK_CUDA(x);            \
+  CHECK_CONTIGUOUS(x)
+#define CHECK_CPU_INPUT(x) \
+  CHECK_CPU(x);            \
+  CHECK_CONTIGUOUS(x)
+
+#endif  // PYTORCH_CPP_HELPER
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch_cuda_helper.hpp b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch_cuda_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9869b535f8a1de758b0c35612dbd4ac2a1701ad9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/pytorch_cuda_helper.hpp
@@ -0,0 +1,19 @@
+#ifndef PYTORCH_CUDA_HELPER
+#define PYTORCH_CUDA_HELPER
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <THC/THCAtomics.cuh>
+
+#include "common_cuda_helper.hpp"
+
+using at::Half;
+using at::Tensor;
+using phalf = at::Half;
+
+#define __PHALF(x) (x)
+
+#endif  // PYTORCH_CUDA_HELPER
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/roi_align_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/roi_align_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..c6f350b34066225dfa1437f357a43d3e89d29c3e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/roi_align_cuda_kernel.cuh
@@ -0,0 +1,210 @@
+#ifndef ROI_ALIGN_CUDA_KERNEL_CUH
+#define ROI_ALIGN_CUDA_KERNEL_CUH
+
+#include <float.h>
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else  // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else  // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif  // MMCV_USE_PARROTS
+#endif  // MMCV_WITH_TRT
+
+/*** Forward ***/
+template <typename T>
+__global__ void roi_align_forward_cuda_kernel(
+    const int nthreads, const T* input, const T* rois, T* output, T* argmax_y,
+    T* argmax_x, const int pooled_height, const int pooled_width,
+    const T spatial_scale, const int sampling_ratio,
+    const int pool_mode,  // 0 - max pool, 1 - avg pool
+    const bool aligned, const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+
+    // Do not using rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_start_w = offset_rois[1] * spatial_scale - offset;
+    T roi_start_h = offset_rois[2] * spatial_scale - offset;
+    T roi_end_w = offset_rois[3] * spatial_scale - offset;
+    T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (!aligned) {  // for backward-compatibility only
+      roi_width = max(roi_width, (T)1.);
+      roi_height = max(roi_height, (T)1.);
+    }
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    const T* offset_input =
+        input + (roi_batch_ind * channels + c) * height * width;
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h =
+        (sampling_ratio > 0)
+            ? sampling_ratio
+            : static_cast<int>(ceil(roi_height / pooled_height));
+    int roi_bin_grid_w = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : static_cast<int>(ceil(roi_width / pooled_width));
+
+    if (pool_mode == 0) {
+      // We do max pooling inside a bin
+      T maxval = -FLT_MAX;
+      T maxidx_y = -1.f, maxidx_x = -1.f;
+      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+        const T y = roi_start_h + ph * bin_size_h +
+                    static_cast<T>(iy + .5f) * bin_size_h /
+                        static_cast<T>(roi_bin_grid_h);
+        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+          const T x = roi_start_w + pw * bin_size_w +
+                      static_cast<T>(ix + .5f) * bin_size_w /
+                          static_cast<T>(roi_bin_grid_w);
+          T val =
+              bilinear_interpolate(offset_input, height, width, y, x, index);
+          if (val > maxval) {
+            maxval = val;
+            maxidx_y = y;
+            maxidx_x = x;
+          }
+        }
+      }
+      output[index] = maxval;
+      argmax_y[index] = maxidx_y;
+      argmax_x[index] = maxidx_x;
+    } else if (pool_mode == 1) {
+      // We do average pooling inside a bin
+      const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1);
+      T output_val = 0.;
+      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+        const T y = roi_start_h + ph * bin_size_h +
+                    static_cast<T>(iy + .5f) * bin_size_h /
+                        static_cast<T>(roi_bin_grid_h);
+        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+          const T x = roi_start_w + pw * bin_size_w +
+                      static_cast<T>(ix + .5f) * bin_size_w /
+                          static_cast<T>(roi_bin_grid_w);
+          T val =
+              bilinear_interpolate(offset_input, height, width, y, x, index);
+          output_val += val;
+        }
+      }
+      output[index] = output_val / count;
+    }
+  }
+}
+
+/*** Backward ***/
+template <typename T>
+__global__ void roi_align_backward_cuda_kernel(
+    const int nthreads, const T* grad_output, const T* rois, const T* argmax_y,
+    const T* argmax_x, T* grad_input, const int pooled_height,
+    const int pooled_width, const T spatial_scale, const int sampling_ratio,
+    const int pool_mode,  // 0 - max pool, 1 - avg pool
+    const bool aligned, const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T grad_output_this_bin = grad_output[index];
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+    T* offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+    if (pool_mode == 0) {
+      T y = argmax_y[index], x = argmax_x[index];
+      if (y != -1.f) {
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                      x_low, x_high, y_low, y_high, index);
+
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          atomicAdd(offset_grad_input + y_low * width + x_low,
+                    grad_output_this_bin * w1);
+          atomicAdd(offset_grad_input + y_low * width + x_high,
+                    grad_output_this_bin * w2);
+          atomicAdd(offset_grad_input + y_high * width + x_low,
+                    grad_output_this_bin * w3);
+          atomicAdd(offset_grad_input + y_high * width + x_high,
+                    grad_output_this_bin * w4);
+        }
+      }
+    } else if (pool_mode == 1) {
+      // Do not using rounding; this implementation detail is critical
+      T offset = aligned ? (T)0.5 : (T)0.0;
+      T roi_start_w = offset_rois[1] * spatial_scale - offset;
+      T roi_start_h = offset_rois[2] * spatial_scale - offset;
+      T roi_end_w = offset_rois[3] * spatial_scale - offset;
+      T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+      T roi_width = roi_end_w - roi_start_w;
+      T roi_height = roi_end_h - roi_start_h;
+      if (!aligned) {  // for backward-compatibility only
+        roi_width = max(roi_width, (T)1.);
+        roi_height = max(roi_height, (T)1.);
+      }
+
+      T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+      T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+      // We use roi_bin_grid to sample the grid and mimic integral
+      int roi_bin_grid_h =
+          (sampling_ratio > 0)
+              ? sampling_ratio
+              : static_cast<int>(ceil(roi_height / pooled_height));
+      int roi_bin_grid_w =
+          (sampling_ratio > 0)
+              ? sampling_ratio
+              : static_cast<int>(ceil(roi_width / pooled_width));
+
+      // We do average (integral) pooling inside a bin
+      const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4
+
+      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+        const T y = roi_start_h + ph * bin_size_h +
+                    static_cast<T>(iy + .5f) * bin_size_h /
+                        static_cast<T>(roi_bin_grid_h);
+        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+          const T x = roi_start_w + pw * bin_size_w +
+                      static_cast<T>(ix + .5f) * bin_size_w /
+                          static_cast<T>(roi_bin_grid_w);
+
+          T w1, w2, w3, w4;
+          int x_low, x_high, y_low, y_high;
+          bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                        x_low, x_high, y_low, y_high, index);
+
+          if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+            atomicAdd(offset_grad_input + y_low * width + x_low,
+                      grad_output_this_bin * w1 / count);
+            atomicAdd(offset_grad_input + y_low * width + x_high,
+                      grad_output_this_bin * w2 / count);
+            atomicAdd(offset_grad_input + y_high * width + x_low,
+                      grad_output_this_bin * w3 / count);
+            atomicAdd(offset_grad_input + y_high * width + x_high,
+                      grad_output_this_bin * w4 / count);
+          }
+        }
+      }
+    }
+  }
+}
+
+#endif  // ROI_ALIGN_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..d499ade76e405fb475caa147b2fb0edce2cdc86d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh
@@ -0,0 +1,92 @@
+#ifndef ROI_POOL_CUDA_KERNEL_CUH
+#define ROI_POOL_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void roi_pool_forward_cuda_kernel(
+    const int nthreads, const T* input, const T* rois, T* output, int* argmax,
+    const int pooled_height, const int pooled_width, const T spatial_scale,
+    const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+    // calculate the roi region on feature maps
+    T roi_x1 = offset_rois[1] * spatial_scale;
+    T roi_y1 = offset_rois[2] * spatial_scale;
+    T roi_x2 = (offset_rois[3] + 1) * spatial_scale;
+    T roi_y2 = (offset_rois[4] + 1) * spatial_scale;
+
+    // force malformed rois to be 1x1
+    T roi_w = roi_x2 - roi_x1;
+    T roi_h = roi_y2 - roi_y1;
+    if (roi_w <= 0 || roi_h <= 0) continue;
+
+    T bin_size_w = roi_w / static_cast<T>(pooled_width);
+    T bin_size_h = roi_h / static_cast<T>(pooled_height);
+
+    // the corresponding bin region
+    int bin_x1 = floor(static_cast<T>(pw) * bin_size_w + roi_x1);
+    int bin_y1 = floor(static_cast<T>(ph) * bin_size_h + roi_y1);
+    int bin_x2 = ceil(static_cast<T>(pw + 1) * bin_size_w + roi_x1);
+    int bin_y2 = ceil(static_cast<T>(ph + 1) * bin_size_h + roi_y1);
+
+    // add roi offsets and clip to input boundaries
+    bin_x1 = min(max(bin_x1, 0), width);
+    bin_y1 = min(max(bin_y1, 0), height);
+    bin_x2 = min(max(bin_x2, 0), width);
+    bin_y2 = min(max(bin_y2, 0), height);
+    bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);
+
+    const T* offset_input =
+        input + (roi_batch_ind * channels + c) * height * width;
+    // Define an empty pooling region to be zero
+    // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
+    T max_val = is_empty ? 0 : -FLT_MAX;
+    int max_idx = -1;
+    for (int h = bin_y1; h < bin_y2; ++h) {
+      for (int w = bin_x1; w < bin_x2; ++w) {
+        int offset = h * width + w;
+        if (offset_input[offset] > max_val) {
+          max_val = offset_input[offset];
+          max_idx = offset;
+        }
+      }
+    }
+    output[index] = max_val;
+    if (argmax != NULL) argmax[index] = max_idx;
+  }
+}
+
+template <typename T>
+__global__ void roi_pool_backward_cuda_kernel(
+    const int nthreads, const T* grad_output, const T* rois, const int* argmax,
+    T* grad_input, const int pooled_height, const int pooled_width,
+    const int channels, const int height, const int width) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c) is an element in the pooled output
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    int roi_batch_ind = rois[n * 5];
+    T* grad_input_offset =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+    int argmax_index = argmax[index];
+
+    if (argmax_index != -1) {
+      atomicAdd(grad_input_offset + argmax_index, grad_output[index]);
+    }
+  }
+}
+
+#endif  // ROI_POOL_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..fb7c636eb76887cf376ab3f01ef278bf228d0a8d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh
@@ -0,0 +1,70 @@
+#ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
+#define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void sigmoid_focal_loss_forward_cuda_kernel(
+    const int nthreads, const T* input, const int64_t* target, const T* weight,
+    T* output, const T gamma, const T alpha, const int num_classes) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index / num_classes;
+    int c = index % num_classes;
+
+    int64_t t = target[n];
+    T flag_p = (t == c);
+    T flag_n = (t != c);
+
+    // p = sigmoid(x) = 1. / 1. + expf(-x)
+    T p = (T)1. / ((T)1. + expf(-input[index]));
+
+    // (1 - p)**gamma * log(p)
+    T term_p = pow(((T)1. - p), gamma) * log(max(p, (T)FLT_MIN));
+    // p**gamma * log(1 - p)
+    T term_n = pow(p, gamma) * log(max((T)1. - p, (T)FLT_MIN));
+
+    output[index] = (T)0.;
+    output[index] += -flag_p * alpha * term_p;
+    output[index] += -flag_n * ((T)1. - alpha) * term_n;
+    if (weight != NULL) {
+      output[index] *= weight[t];
+    }
+  }
+}
+
+template <typename T>
+__global__ void sigmoid_focal_loss_backward_cuda_kernel(
+    const int nthreads, const T* input, const int64_t* target, const T* weight,
+    T* grad_input, const T gamma, const T alpha, const int num_classes) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index / num_classes;
+    int c = index % num_classes;
+
+    int64_t t = target[n];
+    T flag_p = (t == c);
+    T flag_n = (t != c);
+
+    // p = sigmoid(x) = 1. / 1. + expf(-x)
+    T p = (T)1. / ((T)1. + exp(-input[index]));
+
+    // (1 - p)**gamma * (1 - p - gamma*p*log(p))
+    T term_p = pow(((T)1. - p), gamma) *
+               ((T)1. - p - (gamma * p * log(max(p, (T)FLT_MIN))));
+    // p**gamma * (gamma * (1 - p) * log(1 - p) - p)
+    T term_n = pow(p, gamma) *
+               (gamma * ((T)1. - p) * log(max((T)1. - p, (T)FLT_MIN)) - p);
+
+    grad_input[index] = (T)0.;
+    grad_input[index] += -flag_p * alpha * term_p;
+    grad_input[index] += -flag_n * ((T)1. - alpha) * term_n;
+    if (weight != NULL) {
+      grad_input[index] *= weight[t];
+    }
+  }
+}
+
+#endif  // SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..c8ff05b840ad7d48b2b0db0f1b600dd142b19498
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh
@@ -0,0 +1,71 @@
+#ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
+#define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void softmax_focal_loss_forward_cuda_kernel(
+    const int nthreads, const T* softmax, const int64_t* target,
+    const T* weight, T* output, const T gamma, const T alpha,
+    const int num_classes) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int64_t label = target[index];
+    T pred = softmax[index * num_classes + label];
+
+    if (label >= 0) {
+      output[index] =
+          -alpha * pow((T)1. - pred, gamma) * log(max(pred, (T)FLT_MIN));
+    } else {
+      output[index] = 0;
+    }
+    if (weight != NULL) {
+      output[index] *= weight[label];
+    }
+  }
+}
+
+template <typename T>
+__global__ void softmax_focal_loss_backward_cuda1_kernel(
+    const int nthreads, const T* softmax, const int64_t* target,
+    const T* weight, T* buff, const T gamma, const T alpha,
+    const int num_classes) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int64_t label = target[index];
+    T pred = softmax[index * num_classes + label];
+
+    if (label >= 0) {
+      buff[index] = alpha * (-pow((T)1. - pred, gamma) +
+                             gamma * pow((T)1. - pred, gamma - 1) * pred *
+                                 log(max(pred, (T)FLT_MIN)));
+    } else {
+      buff[index] = 0;
+    }
+    if (weight != NULL) {
+      buff[index] *= weight[label];
+    }
+  }
+}
+
+template <typename T>
+__global__ void softmax_focal_loss_backward_cuda2_kernel(
+    const int nthreads, const T* softmax, const int64_t* target, const T* buff,
+    T* grad_input, const int num_classes) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index / num_classes;
+    int c = index % num_classes;
+    int64_t label = target[n];
+
+    if (label >= 0) {
+      T flag = (label == c ? (T)1. : (T)0.);
+      grad_input[index] = buff[n] * (flag - softmax[index]);
+    } else {
+      grad_input[index] = 0;
+    }
+  }
+}
+
+#endif  // SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..41844f8d073b9f27600b808f7fb091d132432f68
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh
@@ -0,0 +1,330 @@
+#ifndef SYNCBN_CUDA_KERNEL_CUH
+#define SYNCBN_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean,
+                                                 int num, int channels,
+                                                 int spatial) {
+  __shared__ float buffer[THREADS_PER_BLOCK];
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  buffer[tid] = 0;
+  for (int i = tid; i < num * spatial; i += blockDim.x) {
+    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
+    buffer[tid] += input[index];
+  }
+  __syncthreads();
+
+  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      buffer[tid] += buffer[tid + s];
+    }
+    __syncthreads();
+  }
+  int total = num * spatial;
+  if (tid == 0) {
+    mean[c] = buffer[0] / total;
+  }
+}
+
+template <>
+__global__ void sync_bn_forward_mean_cuda_kernel(const phalf *input,
+                                                 float *mean, int num,
+                                                 int channels, int spatial) {
+  __shared__ float buffer[THREADS_PER_BLOCK];
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  buffer[tid] = 0;
+  for (int i = tid; i < num * spatial; i += blockDim.x) {
+    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
+    buffer[tid] += static_cast<float>(input[index]);
+  }
+  __syncthreads();
+
+  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      buffer[tid] += buffer[tid + s];
+    }
+    __syncthreads();
+  }
+  int total = num * spatial;
+  if (tid == 0) {
+    mean[c] = buffer[0] / total;
+  }
+}
+
+template <typename T>
+__global__ void sync_bn_forward_var_cuda_kernel(const T *input,
+                                                const float *mean, float *var,
+                                                int num, int channels,
+                                                int spatial) {
+  __shared__ float buffer[THREADS_PER_BLOCK];
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  buffer[tid] = 0;
+  for (int i = tid; i < num * spatial; i += blockDim.x) {
+    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
+    float td = input[index] - mean[c];
+    buffer[tid] += td * td;
+  }
+  __syncthreads();
+  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      buffer[tid] += buffer[tid + s];
+    }
+    __syncthreads();
+  }
+  int total = num * spatial;
+  if (tid == 0) {
+    var[c] = buffer[0] / total;
+  }
+}
+
+template <>
+__global__ void sync_bn_forward_var_cuda_kernel(const phalf *input,
+                                                const float *mean, float *var,
+                                                int num, int channels,
+                                                int spatial) {
+  __shared__ float buffer[THREADS_PER_BLOCK];
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  buffer[tid] = 0;
+  for (int i = tid; i < num * spatial; i += blockDim.x) {
+    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
+    float td = static_cast<float>(input[index]) - mean[c];
+    buffer[tid] += td * td;
+  }
+  __syncthreads();
+  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      buffer[tid] += buffer[tid + s];
+    }
+    __syncthreads();
+  }
+  int total = num * spatial;
+  if (tid == 0) {
+    var[c] = buffer[0] / total;
+  }
+}
+
+template <typename T>
+__global__ void sync_bn_forward_output_cuda_kernel(
+    const T *input, const float *mean, const float *var, float *running_mean,
+    float *running_var, const float *weight, const float *bias, float *norm,
+    float *std, T *output, int num, int channels, int spatial, float eps,
+    float momentum, int group_size) {
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  float mean_value = mean[c];
+  float std_value = sqrt(var[c] + eps);
+
+  if (weight != nullptr) {
+    float weight_value = weight[c];
+    float bias_value = bias[c];
+    if (norm != nullptr) {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        norm[index] = (input[index] - mean_value) / std_value;
+        output[index] = norm[index] * weight_value + bias_value;
+      }
+    } else {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        output[index] =
+            (input[index] - mean_value) / std_value * weight_value + bias_value;
+      }
+    }
+  } else {
+    if (norm != nullptr) {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        output[index] = norm[index] = (input[index] - mean_value) / std_value;
+      }
+    } else {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        output[index] = (input[index] - mean_value) / std_value;
+      }
+    }
+  }
+  if (tid == 0) {
+    if (std != nullptr) std[c] = std_value;
+    if (running_mean != nullptr) {
+      running_mean[c] =
+          momentum * mean_value + (1 - momentum) * running_mean[c];
+      int count = num * spatial * group_size;
+      float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c];
+      running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c];
+    }
+  }
+}
+
+template <>
+__global__ void sync_bn_forward_output_cuda_kernel(
+    const phalf *input, const float *mean, const float *var,
+    float *running_mean, float *running_var, const float *weight,
+    const float *bias, float *norm, float *std, phalf *output, int num,
+    int channels, int spatial, float eps, float momentum, int group_size) {
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  float mean_value = mean[c];
+  float std_value = sqrt(var[c] + eps);
+  if (weight != nullptr) {
+    float weight_value = weight[c];
+    float bias_value = bias[c];
+    if (norm != nullptr) {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        norm[index] =
+            (static_cast<float>(input[index]) - mean_value) / std_value;
+        output[index] =
+            static_cast<phalf>(norm[index] * weight_value + bias_value);
+      }
+    } else {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        output[index] =
+            static_cast<phalf>((static_cast<float>(input[index]) - mean_value) /
+                                   std_value * weight_value +
+                               bias_value);
+      }
+    }
+  } else {
+    if (norm != nullptr) {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        norm[index] =
+            (static_cast<float>(input[index]) - mean_value) / std_value;
+        output[index] = static_cast<phalf>(norm[index]);
+      }
+    } else {
+      for (int i = tid; i < num * spatial; i += blockDim.x) {
+        int index =
+            (i / spatial) * channels * spatial + c * spatial + i % spatial;
+        output[index] = static_cast<phalf>(
+            (static_cast<float>(input[index]) - mean_value) / std_value);
+      }
+    }
+  }
+  if (tid == 0) {
+    if (std != nullptr) std[c] = std_value;
+    if (running_mean != nullptr) {
+      running_mean[c] =
+          momentum * mean_value + (1 - momentum) * running_mean[c];
+      int count = num * spatial * group_size;
+      float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c];
+      running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c];
+    }
+  }
+}
+
+template <typename T>
+__global__ void sync_bn_backward_param_cuda_kernel(const T *grad_output,
+                                                   const float *norm,
+                                                   float *grad_weight,
+                                                   float *grad_bias, int num,
+                                                   int channels, int spatial) {
+  __shared__ float buffer1[THREADS_PER_BLOCK];
+  __shared__ float buffer2[THREADS_PER_BLOCK];
+
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  buffer1[tid] = buffer2[tid] = 0;
+  for (int i = tid; i < num * spatial; i += blockDim.x) {
+    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
+    buffer1[tid] += grad_output[index] * norm[index];
+    buffer2[tid] += grad_output[index];
+  }
+  __syncthreads();
+
+  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      buffer1[tid] += buffer1[tid + s];
+      buffer2[tid] += buffer2[tid + s];
+    }
+    __syncthreads();
+  }
+  if (tid == 0) {
+    grad_weight[c] = buffer1[0];
+    grad_bias[c] = buffer2[0];
+  }
+}
+
+template <>
+__global__ void sync_bn_backward_param_cuda_kernel(const phalf *grad_output,
+                                                   const float *norm,
+                                                   float *grad_weight,
+                                                   float *grad_bias, int num,
+                                                   int channels, int spatial) {
+  __shared__ float buffer1[THREADS_PER_BLOCK];
+  __shared__ float buffer2[THREADS_PER_BLOCK];
+
+  int tid = threadIdx.x;
+  int c = blockIdx.x;
+  buffer1[tid] = buffer2[tid] = 0;
+  for (int i = tid; i < num * spatial; i += blockDim.x) {
+    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
+    buffer1[tid] += static_cast<float>(grad_output[index]) * norm[index];
+    buffer2[tid] += static_cast<float>(grad_output[index]);
+  }
+  __syncthreads();
+
+  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      buffer1[tid] += buffer1[tid + s];
+      buffer2[tid] += buffer2[tid + s];
+    }
+    __syncthreads();
+  }
+  if (tid == 0) {
+    grad_weight[c] = buffer1[0];
+    grad_bias[c] = buffer2[0];
+  }
+}
+
+template <typename T>
+__global__ void sync_bn_backward_data_cuda_kernel(
+    int output_size, const T *grad_output, const float *weight,
+    const float *grad_weight, const float *grad_bias, const float *norm,
+    const float *std, T *grad_input, int num, int channels, int spatial) {
+  int factor = num * spatial;
+  CUDA_1D_KERNEL_LOOP(index, output_size) {
+    int c = (index / spatial) % channels;
+    grad_input[index] =
+        weight[c] *
+        (grad_output[index] -
+         (grad_weight[c] * norm[index] + grad_bias[c]) / factor) /
+        std[c];
+  }
+}
+
+template <>
+__global__ void sync_bn_backward_data_cuda_kernel(
+    int output_size, const phalf *grad_output, const float *weight,
+    const float *grad_weight, const float *grad_bias, const float *norm,
+    const float *std, phalf *grad_input, int num, int channels, int spatial) {
+  int factor = num * spatial;
+  CUDA_1D_KERNEL_LOOP(index, output_size) {
+    int c = (index / spatial) % channels;
+    grad_input[index] = static_cast<phalf>(
+        weight[c] *
+        (static_cast<float>(grad_output[index]) -
+         (grad_weight[c] * norm[index] + grad_bias[c]) / factor) /
+        std[c]);
+  }
+}
+
+#endif  // SYNCBN_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..352244bb213e64462a9d5ece9b8e223ead7aa228
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh
@@ -0,0 +1,60 @@
+#ifndef TIN_SHIFT_CUDA_KERNEL_CUH
+#define TIN_SHIFT_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template <typename T>
+__global__ void tin_shift_forward_cuda_kernel(
+    const int nthreads, const T* input, const int* shift, T* output,
+    const int batch_size, const int channels, const int t_size,
+    const int hw_size, const int group_size, const int group_channel) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    const int hw_index = index % hw_size;
+    const int j = (index / hw_size) % channels;
+
+    const int n_index = (index / hw_size / channels) % batch_size;
+    int group_id = j / group_channel;
+    int t_shift = shift[n_index * group_size + group_id];
+    int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index;
+    for (int i = 0; i < t_size; i++) {
+      int now_t = i + t_shift;
+      int data_id = i * hw_size * channels + offset;
+      if (now_t < 0 || now_t >= t_size) {
+        continue;
+      }
+      int out_id = now_t * hw_size * channels + offset;
+      output[out_id] = input[data_id];
+    }
+  }
+}
+
+template <typename T>
+__global__ void tin_shift_backward_cuda_kernel(
+    const int nthreads, const T* input, const int* shift, T* output,
+    const int batch_size, const int channels, const int t_size,
+    const int hw_size, const int group_size, const int group_channel) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    const int hw_index = index % hw_size;
+    const int j = (index / hw_size) % channels;
+
+    const int n_index = (index / hw_size / channels) % batch_size;
+    int group_id = j / group_channel;
+    int t_shift = shift[n_index * group_size + group_id];
+    int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index;
+    for (int i = 0; i < t_size; i++) {
+      int now_t = i + t_shift;
+      int data_id = i * hw_size * channels + offset;
+      if (now_t < 0 || now_t >= t_size) {
+        continue;
+      }
+      int out_id = now_t * hw_size * channels + offset;
+      output[out_id] = input[data_id];
+    }
+  }
+}
+
+#endif  // TIN_SHIFT_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deform_conv.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deform_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..250e096a59002ae1b76c62b6ff272f10ac0417e0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deform_conv.py
@@ -0,0 +1,324 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair, _single
+
+from mmcv.utils import deprecated_api_warning
+from ..cnn import CONV_LAYERS
+from ..utils import ext_loader, print_log
+
+ext_module = ext_loader.load_ext('_ext', [
+    'deform_conv_forward', 'deform_conv_backward_input',
+    'deform_conv_backward_parameters'
+])
+
+
+class DeformConv2dFunction(Function):
+
+    @staticmethod
+    def symbolic(g,
+                 input,
+                 offset,
+                 weight,
+                 stride,
+                 padding,
+                 dilation,
+                 groups,
+                 deform_groups,
+                 bias=False,
+                 im2col_step=32):
+        return g.op(
+            'MMCVDeformConv2d',
+            input,
+            offset,
+            weight,
+            stride_i=stride,
+            padding_i=padding,
+            dilation_i=dilation,
+            groups_i=groups,
+            deform_groups_i=deform_groups,
+            bias_i=bias,
+            im2col_step_i=im2col_step)
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                offset,
+                weight,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                deform_groups=1,
+                bias=False,
+                im2col_step=32):
+        if input is not None and input.dim() != 4:
+            raise ValueError(
+                f'Expected 4D tensor as input, got {input.dim()}D tensor \
+                  instead.')
+        assert bias is False, 'Only support bias is False.'
+        ctx.stride = _pair(stride)
+        ctx.padding = _pair(padding)
+        ctx.dilation = _pair(dilation)
+        ctx.groups = groups
+        ctx.deform_groups = deform_groups
+        ctx.im2col_step = im2col_step
+
+        ctx.save_for_backward(input, offset, weight)
+
+        output = input.new_empty(
+            DeformConv2dFunction._output_size(ctx, input, weight))
+
+        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones
+
+        cur_im2col_step = min(ctx.im2col_step, input.size(0))
+        assert (input.size(0) %
+                cur_im2col_step) == 0, 'im2col step must divide batchsize'
+        ext_module.deform_conv_forward(
+            input,
+            weight,
+            offset,
+            output,
+            ctx.bufs_[0],
+            ctx.bufs_[1],
+            kW=weight.size(3),
+            kH=weight.size(2),
+            dW=ctx.stride[1],
+            dH=ctx.stride[0],
+            padW=ctx.padding[1],
+            padH=ctx.padding[0],
+            dilationW=ctx.dilation[1],
+            dilationH=ctx.dilation[0],
+            group=ctx.groups,
+            deformable_group=ctx.deform_groups,
+            im2col_step=cur_im2col_step)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, offset, weight = ctx.saved_tensors
+
+        grad_input = grad_offset = grad_weight = None
+
+        cur_im2col_step = min(ctx.im2col_step, input.size(0))
+        assert (input.size(0) %
+                cur_im2col_step) == 0, 'im2col step must divide batchsize'
+
+        grad_output = grad_output.contiguous()
+        if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+            grad_input = torch.zeros_like(input)
+            grad_offset = torch.zeros_like(offset)
+            ext_module.deform_conv_backward_input(
+                input,
+                offset,
+                grad_output,
+                grad_input,
+                grad_offset,
+                weight,
+                ctx.bufs_[0],
+                kW=weight.size(3),
+                kH=weight.size(2),
+                dW=ctx.stride[1],
+                dH=ctx.stride[0],
+                padW=ctx.padding[1],
+                padH=ctx.padding[0],
+                dilationW=ctx.dilation[1],
+                dilationH=ctx.dilation[0],
+                group=ctx.groups,
+                deformable_group=ctx.deform_groups,
+                im2col_step=cur_im2col_step)
+
+        if ctx.needs_input_grad[2]:
+            grad_weight = torch.zeros_like(weight)
+            ext_module.deform_conv_backward_parameters(
+                input,
+                offset,
+                grad_output,
+                grad_weight,
+                ctx.bufs_[0],
+                ctx.bufs_[1],
+                kW=weight.size(3),
+                kH=weight.size(2),
+                dW=ctx.stride[1],
+                dH=ctx.stride[0],
+                padW=ctx.padding[1],
+                padH=ctx.padding[0],
+                dilationW=ctx.dilation[1],
+                dilationH=ctx.dilation[0],
+                group=ctx.groups,
+                deformable_group=ctx.deform_groups,
+                scale=1,
+                im2col_step=cur_im2col_step)
+
+        return grad_input, grad_offset, grad_weight, \
+            None, None, None, None, None, None, None
+
+    @staticmethod
+    def _output_size(ctx, input, weight):
+        channels = weight.size(0)
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = ctx.padding[d]
+            kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride_ = ctx.stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(
+                'convolution input is too small (output would be ' +
+                'x'.join(map(str, output_size)) + ')')
+        return output_size
+
+
+deform_conv2d = DeformConv2dFunction.apply
+
+
+class DeformConv2d(nn.Module):
+
+    @deprecated_api_warning({'deformable_groups': 'deform_groups'},
+                            cls_name='DeformConv2d')
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 deform_groups=1,
+                 bias=False):
+        super(DeformConv2d, self).__init__()
+
+        assert not bias, \
+            f'bias={bias} is not supported in DeformConv2d.'
+        assert in_channels % groups == 0, \
+            f'in_channels {in_channels} cannot be divisible by groups {groups}'
+        assert out_channels % groups == 0, \
+            f'out_channels {out_channels} cannot be divisible by groups \
+              {groups}'
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.deform_groups = deform_groups
+        # enable compatibility with nn.Conv2d
+        self.transposed = False
+        self.output_padding = _single(0)
+
+        # only weight, no bias
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels // self.groups,
+                         *self.kernel_size))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+
+    def forward(self, x, offset):
+        # To fix an assert error in deform_conv_cuda.cpp:128
+        # input image is smaller than kernel
+        input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) <
+                                                          self.kernel_size[1])
+        if input_pad:
+            pad_h = max(self.kernel_size[0] - x.size(2), 0)
+            pad_w = max(self.kernel_size[1] - x.size(3), 0)
+            x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
+            offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0)
+            offset = offset.contiguous()
+        out = deform_conv2d(x, offset, self.weight, self.stride, self.padding,
+                            self.dilation, self.groups, self.deform_groups)
+        if input_pad:
+            out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
+                      pad_w].contiguous()
+        return out
+
+
+@CONV_LAYERS.register_module('DCN')
+class DeformConv2dPack(DeformConv2d):
+    """A Deformable Conv Encapsulation that acts as normal Conv layers.
+
+    The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
+    The spatial arrangement is like:
+
+    .. code:: text
+
+        (x0, y0) (x1, y1) (x2, y2)
+        (x3, y3) (x4, y4) (x5, y5)
+        (x6, y6) (x7, y7) (x8, y8)
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int or tuple[int]): Same as nn.Conv2d.
+        stride (int or tuple[int]): Same as nn.Conv2d.
+        padding (int or tuple[int]): Same as nn.Conv2d.
+        dilation (int or tuple[int]): Same as nn.Conv2d.
+        groups (int): Same as nn.Conv2d.
+        bias (bool or str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+            False.
+    """
+
+    _version = 2
+
+    def __init__(self, *args, **kwargs):
+        super(DeformConv2dPack, self).__init__(*args, **kwargs)
+        self.conv_offset = nn.Conv2d(
+            self.in_channels,
+            self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
+            kernel_size=self.kernel_size,
+            stride=_pair(self.stride),
+            padding=_pair(self.padding),
+            dilation=_pair(self.dilation),
+            bias=True)
+        self.init_offset()
+
+    def init_offset(self):
+        self.conv_offset.weight.data.zero_()
+        self.conv_offset.bias.data.zero_()
+
+    def forward(self, x):
+        offset = self.conv_offset(x)
+        return deform_conv2d(x, offset, self.weight, self.stride, self.padding,
+                             self.dilation, self.groups, self.deform_groups)
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        version = local_metadata.get('version', None)
+
+        if version is None or version < 2:
+            # the key is different in early versions
+            # In version < 2, DeformConvPack loads previous benchmark models.
+            if (prefix + 'conv_offset.weight' not in state_dict
+                    and prefix[:-1] + '_offset.weight' in state_dict):
+                state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
+                    prefix[:-1] + '_offset.weight')
+            if (prefix + 'conv_offset.bias' not in state_dict
+                    and prefix[:-1] + '_offset.bias' in state_dict):
+                state_dict[prefix +
+                           'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
+                                                                '_offset.bias')
+
+        if version is not None and version > 1:
+            print_log(
+                f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to '
+                'version 2.',
+                logger='root')
+
+        super()._load_from_state_dict(state_dict, prefix, local_metadata,
+                                      strict, missing_keys, unexpected_keys,
+                                      error_msgs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deform_roi_pool.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deform_roi_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ec9dd7fd51258b0523c078d00289818ca8fe54e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deform_roi_pool.py
@@ -0,0 +1,203 @@
+from torch import nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+    '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward'])
+
+
+class DeformRoIPoolFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, rois, offset, output_size, spatial_scale,
+                 sampling_ratio, gamma):
+        return g.op(
+            'MMCVDeformRoIPool',
+            input,
+            rois,
+            offset,
+            pooled_height=output_size[0],
+            pooled_width=output_size[1],
+            spatial_scale=spatial_scale,
+            sampling_ratio=sampling_ratio,
+            gamma=gamma)
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                rois,
+                offset,
+                output_size,
+                spatial_scale=1.0,
+                sampling_ratio=0,
+                gamma=0.1):
+        if offset is None:
+            offset = input.new_zeros(0)
+        ctx.output_size = _pair(output_size)
+        ctx.spatial_scale = float(spatial_scale)
+        ctx.sampling_ratio = int(sampling_ratio)
+        ctx.gamma = float(gamma)
+
+        assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
+
+        output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
+                        ctx.output_size[1])
+        output = input.new_zeros(output_shape)
+
+        ext_module.deform_roi_pool_forward(
+            input,
+            rois,
+            offset,
+            output,
+            pooled_height=ctx.output_size[0],
+            pooled_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale,
+            sampling_ratio=ctx.sampling_ratio,
+            gamma=ctx.gamma)
+
+        ctx.save_for_backward(input, rois, offset)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, rois, offset = ctx.saved_tensors
+        grad_input = grad_output.new_zeros(input.shape)
+        grad_offset = grad_output.new_zeros(offset.shape)
+
+        ext_module.deform_roi_pool_backward(
+            grad_output,
+            input,
+            rois,
+            offset,
+            grad_input,
+            grad_offset,
+            pooled_height=ctx.output_size[0],
+            pooled_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale,
+            sampling_ratio=ctx.sampling_ratio,
+            gamma=ctx.gamma)
+        if grad_offset.numel() == 0:
+            grad_offset = None
+        return grad_input, None, grad_offset, None, None, None, None
+
+
+deform_roi_pool = DeformRoIPoolFunction.apply
+
+
+class DeformRoIPool(nn.Module):
+
+    def __init__(self,
+                 output_size,
+                 spatial_scale=1.0,
+                 sampling_ratio=0,
+                 gamma=0.1):
+        super(DeformRoIPool, self).__init__()
+        self.output_size = _pair(output_size)
+        self.spatial_scale = float(spatial_scale)
+        self.sampling_ratio = int(sampling_ratio)
+        self.gamma = float(gamma)
+
+    def forward(self, input, rois, offset=None):
+        return deform_roi_pool(input, rois, offset, self.output_size,
+                               self.spatial_scale, self.sampling_ratio,
+                               self.gamma)
+
+
+class DeformRoIPoolPack(DeformRoIPool):
+
+    def __init__(self,
+                 output_size,
+                 output_channels,
+                 deform_fc_channels=1024,
+                 spatial_scale=1.0,
+                 sampling_ratio=0,
+                 gamma=0.1):
+        super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale,
+                                                sampling_ratio, gamma)
+
+        self.output_channels = output_channels
+        self.deform_fc_channels = deform_fc_channels
+
+        self.offset_fc = nn.Sequential(
+            nn.Linear(
+                self.output_size[0] * self.output_size[1] *
+                self.output_channels, self.deform_fc_channels),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.deform_fc_channels,
+                      self.output_size[0] * self.output_size[1] * 2))
+        self.offset_fc[-1].weight.data.zero_()
+        self.offset_fc[-1].bias.data.zero_()
+
+    def forward(self, input, rois):
+        assert input.size(1) == self.output_channels
+        x = deform_roi_pool(input, rois, None, self.output_size,
+                            self.spatial_scale, self.sampling_ratio,
+                            self.gamma)
+        rois_num = rois.size(0)
+        offset = self.offset_fc(x.view(rois_num, -1))
+        offset = offset.view(rois_num, 2, self.output_size[0],
+                             self.output_size[1])
+        return deform_roi_pool(input, rois, offset, self.output_size,
+                               self.spatial_scale, self.sampling_ratio,
+                               self.gamma)
+
+
+class ModulatedDeformRoIPoolPack(DeformRoIPool):
+
+    def __init__(self,
+                 output_size,
+                 output_channels,
+                 deform_fc_channels=1024,
+                 spatial_scale=1.0,
+                 sampling_ratio=0,
+                 gamma=0.1):
+        super(ModulatedDeformRoIPoolPack,
+              self).__init__(output_size, spatial_scale, sampling_ratio, gamma)
+
+        self.output_channels = output_channels
+        self.deform_fc_channels = deform_fc_channels
+
+        self.offset_fc = nn.Sequential(
+            nn.Linear(
+                self.output_size[0] * self.output_size[1] *
+                self.output_channels, self.deform_fc_channels),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.deform_fc_channels,
+                      self.output_size[0] * self.output_size[1] * 2))
+        self.offset_fc[-1].weight.data.zero_()
+        self.offset_fc[-1].bias.data.zero_()
+
+        self.mask_fc = nn.Sequential(
+            nn.Linear(
+                self.output_size[0] * self.output_size[1] *
+                self.output_channels, self.deform_fc_channels),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.deform_fc_channels,
+                      self.output_size[0] * self.output_size[1] * 1),
+            nn.Sigmoid())
+        self.mask_fc[2].weight.data.zero_()
+        self.mask_fc[2].bias.data.zero_()
+
+    def forward(self, input, rois):
+        assert input.size(1) == self.output_channels
+        x = deform_roi_pool(input, rois, None, self.output_size,
+                            self.spatial_scale, self.sampling_ratio,
+                            self.gamma)
+        rois_num = rois.size(0)
+        offset = self.offset_fc(x.view(rois_num, -1))
+        offset = offset.view(rois_num, 2, self.output_size[0],
+                             self.output_size[1])
+        mask = self.mask_fc(x.view(rois_num, -1))
+        mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1])
+        d = deform_roi_pool(input, rois, offset, self.output_size,
+                            self.spatial_scale, self.sampling_ratio,
+                            self.gamma)
+        return d * mask
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deprecated_wrappers.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deprecated_wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..863611b2e1039af91c7817b6e289488da2f78738
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/deprecated_wrappers.py
@@ -0,0 +1,42 @@
+# This file is for backward compatibility.
+# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks.
+import warnings
+
+from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d
+
+
+class Conv2d_deprecated(Conv2d):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in'
+            ' the future. Please import them from "mmcv.cnn" instead')
+
+
+class ConvTranspose2d_deprecated(ConvTranspose2d):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            'Importing ConvTranspose2d wrapper from "mmcv.ops" will be '
+            'deprecated in the future. Please import them from "mmcv.cnn" '
+            'instead')
+
+
+class MaxPool2d_deprecated(MaxPool2d):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in'
+            ' the future. Please import them from "mmcv.cnn" instead')
+
+
+class Linear_deprecated(Linear):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            'Importing Linear wrapper from "mmcv.ops" will be deprecated in'
+            ' the future. Please import them from "mmcv.cnn" instead')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/focal_loss.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/focal_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e05b16d5eb61a187e1876b255fb99ced9f9fb20
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/focal_loss.py
@@ -0,0 +1,211 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+    'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
+    'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
+])
+
+
+class SigmoidFocalLossFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, target, gamma, alpha, weight, reduction):
+        return g.op(
+            'MMCVSigmoidFocalLoss',
+            input,
+            target,
+            gamma=gamma,
+            alpha=alpha,
+            weight=weight,
+            reduction=reduction)
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                target,
+                gamma=2.0,
+                alpha=0.25,
+                weight=None,
+                reduction='mean'):
+
+        assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
+        assert input.dim() == 2
+        assert target.dim() == 1
+        assert input.size(0) == target.size(0)
+        if weight is None:
+            weight = input.new_empty(0)
+        else:
+            assert weight.dim() == 1
+            assert input.size(1) == weight.size(0)
+        ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
+        assert reduction in ctx.reduction_dict.keys()
+
+        ctx.gamma = float(gamma)
+        ctx.alpha = float(alpha)
+        ctx.reduction = ctx.reduction_dict[reduction]
+
+        output = input.new_zeros(input.size())
+
+        ext_module.sigmoid_focal_loss_forward(
+            input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
+        if ctx.reduction == ctx.reduction_dict['mean']:
+            output = output.sum() / input.size(0)
+        elif ctx.reduction == ctx.reduction_dict['sum']:
+            output = output.sum()
+        ctx.save_for_backward(input, target, weight)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, target, weight = ctx.saved_tensors
+
+        grad_input = input.new_zeros(input.size())
+
+        ext_module.sigmoid_focal_loss_backward(
+            input,
+            target,
+            weight,
+            grad_input,
+            gamma=ctx.gamma,
+            alpha=ctx.alpha)
+
+        grad_input *= grad_output
+        if ctx.reduction == ctx.reduction_dict['mean']:
+            grad_input /= input.size(0)
+        return grad_input, None, None, None, None, None
+
+
+sigmoid_focal_loss = SigmoidFocalLossFunction.apply
+
+
+class SigmoidFocalLoss(nn.Module):
+
+    def __init__(self, gamma, alpha, weight=None, reduction='mean'):
+        super(SigmoidFocalLoss, self).__init__()
+        self.gamma = gamma
+        self.alpha = alpha
+        self.register_buffer('weight', weight)
+        self.reduction = reduction
+
+    def forward(self, input, target):
+        return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
+                                  self.weight, self.reduction)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(gamma={self.gamma}, '
+        s += f'alpha={self.alpha}, '
+        s += f'reduction={self.reduction})'
+        return s
+
+
+class SoftmaxFocalLossFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, target, gamma, alpha, weight, reduction):
+        return g.op(
+            'MMCVSoftmaxFocalLoss',
+            input,
+            target,
+            gamma=gamma,
+            alpha=alpha,
+            weight=weight,
+            reduction=reduction)
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                target,
+                gamma=2.0,
+                alpha=0.25,
+                weight=None,
+                reduction='mean'):
+
+        assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
+        assert input.dim() == 2
+        assert target.dim() == 1
+        assert input.size(0) == target.size(0)
+        if weight is None:
+            weight = input.new_empty(0)
+        else:
+            assert weight.dim() == 1
+            assert input.size(1) == weight.size(0)
+        ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
+        assert reduction in ctx.reduction_dict.keys()
+
+        ctx.gamma = float(gamma)
+        ctx.alpha = float(alpha)
+        ctx.reduction = ctx.reduction_dict[reduction]
+
+        channel_stats, _ = torch.max(input, dim=1)
+        input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
+        input_softmax.exp_()
+
+        channel_stats = input_softmax.sum(dim=1)
+        input_softmax /= channel_stats.unsqueeze(1).expand_as(input)
+
+        output = input.new_zeros(input.size(0))
+        ext_module.softmax_focal_loss_forward(
+            input_softmax,
+            target,
+            weight,
+            output,
+            gamma=ctx.gamma,
+            alpha=ctx.alpha)
+
+        if ctx.reduction == ctx.reduction_dict['mean']:
+            output = output.sum() / input.size(0)
+        elif ctx.reduction == ctx.reduction_dict['sum']:
+            output = output.sum()
+        ctx.save_for_backward(input_softmax, target, weight)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input_softmax, target, weight = ctx.saved_tensors
+        buff = input_softmax.new_zeros(input_softmax.size(0))
+        grad_input = input_softmax.new_zeros(input_softmax.size())
+
+        ext_module.softmax_focal_loss_backward(
+            input_softmax,
+            target,
+            weight,
+            buff,
+            grad_input,
+            gamma=ctx.gamma,
+            alpha=ctx.alpha)
+
+        grad_input *= grad_output
+        if ctx.reduction == ctx.reduction_dict['mean']:
+            grad_input /= input_softmax.size(0)
+        return grad_input, None, None, None, None, None
+
+
+softmax_focal_loss = SoftmaxFocalLossFunction.apply
+
+
+class SoftmaxFocalLoss(nn.Module):
+
+    def __init__(self, gamma, alpha, weight=None, reduction='mean'):
+        super(SoftmaxFocalLoss, self).__init__()
+        self.gamma = gamma
+        self.alpha = alpha
+        self.register_buffer('weight', weight)
+        self.reduction = reduction
+
+    def forward(self, input, target):
+        return softmax_focal_loss(input, target, self.gamma, self.alpha,
+                                  self.weight, self.reduction)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(gamma={self.gamma}, '
+        s += f'alpha={self.alpha}, '
+        s += f'reduction={self.reduction})'
+        return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/info.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/info.py
new file mode 100644
index 0000000000000000000000000000000000000000..01d9a6fdd62fb270c9f1e1f7100af107ebe3f9d6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/info.py
@@ -0,0 +1,35 @@
+import glob
+import os
+
+import torch
+
+if torch.__version__ == 'parrots':
+    import parrots
+
+    def get_compiler_version():
+        return 'GCC ' + parrots.version.compiler
+
+    def get_compiling_cuda_version():
+        return parrots.version.cuda
+else:
+    from ..utils import ext_loader
+    ext_module = ext_loader.load_ext(
+        '_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
+
+    def get_compiler_version():
+        return ext_module.get_compiler_version()
+
+    def get_compiling_cuda_version():
+        return ext_module.get_compiling_cuda_version()
+
+
+def get_onnxruntime_op_path():
+    wildcard = os.path.join(
+        os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
+        '_ext_ort.*.so')
+
+    paths = glob.glob(wildcard)
+    if len(paths) > 0:
+        return paths[0]
+    else:
+        return ''
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/masked_conv.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/masked_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..88b536c22bfee69a336bb740c07ff42b6991c4b1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/masked_conv.py
@@ -0,0 +1,110 @@
+import math
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+    '_ext', ['masked_im2col_forward', 'masked_col2im_forward'])
+
+
+class MaskedConv2dFunction(Function):
+
+    @staticmethod
+    def symbolic(g, features, mask, weight, bias, padding, stride):
+        return g.op(
+            'MMCVMaskedConv2d',
+            features,
+            mask,
+            weight,
+            bias,
+            padding=padding,
+            stride=stride)
+
+    @staticmethod
+    def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
+        assert mask.dim() == 3 and mask.size(0) == 1
+        assert features.dim() == 4 and features.size(0) == 1
+        assert features.size()[2:] == mask.size()[1:]
+        pad_h, pad_w = _pair(padding)
+        stride_h, stride_w = _pair(stride)
+        if stride_h != 1 or stride_w != 1:
+            raise ValueError(
+                'Stride could not only be 1 in masked_conv2d currently.')
+        out_channel, in_channel, kernel_h, kernel_w = weight.size()
+
+        batch_size = features.size(0)
+        out_h = int(
+            math.floor((features.size(2) + 2 * pad_h -
+                        (kernel_h - 1) - 1) / stride_h + 1))
+        out_w = int(
+            math.floor((features.size(3) + 2 * pad_w -
+                        (kernel_h - 1) - 1) / stride_w + 1))
+        mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
+        output = features.new_zeros(batch_size, out_channel, out_h, out_w)
+        if mask_inds.numel() > 0:
+            mask_h_idx = mask_inds[:, 0].contiguous()
+            mask_w_idx = mask_inds[:, 1].contiguous()
+            data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
+                                          mask_inds.size(0))
+            ext_module.masked_im2col_forward(
+                features,
+                mask_h_idx,
+                mask_w_idx,
+                data_col,
+                kernel_h=kernel_h,
+                kernel_w=kernel_w,
+                pad_h=pad_h,
+                pad_w=pad_w)
+
+            masked_output = torch.addmm(1, bias[:, None], 1,
+                                        weight.view(out_channel, -1), data_col)
+            ext_module.masked_col2im_forward(
+                masked_output,
+                mask_h_idx,
+                mask_w_idx,
+                output,
+                height=out_h,
+                width=out_w,
+                channels=out_channel)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        return (None, ) * 5
+
+
+masked_conv2d = MaskedConv2dFunction.apply
+
+
+class MaskedConv2d(nn.Conv2d):
+    """A MaskedConv2d which inherits the official Conv2d.
+
+    The masked forward doesn't implement the backward function and only
+    supports the stride parameter to be 1 currently.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True):
+        super(MaskedConv2d,
+              self).__init__(in_channels, out_channels, kernel_size, stride,
+                             padding, dilation, groups, bias)
+
+    def forward(self, input, mask=None):
+        if mask is None:  # fallback to the normal Conv2d
+            return super(MaskedConv2d, self).forward(input)
+        else:
+            return masked_conv2d(input, mask, self.weight, self.bias,
+                                 self.padding)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/merge_cells.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/merge_cells.py
new file mode 100644
index 0000000000000000000000000000000000000000..b881026c452b601983a16856b9d8da7ead0de9a2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/merge_cells.py
@@ -0,0 +1,148 @@
+from abc import abstractmethod
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..cnn import ConvModule
+
+
+class BaseMergeCell(nn.Module):
+    """The basic class for cells used in NAS-FPN and NAS-FCOS.
+
+    BaseMergeCell takes 2 inputs. After applying concolution
+    on them, they are resized to the target size. Then,
+    they go through binary_op, which depends on the type of cell.
+    If with_out_conv is True, the result of output will go through
+    another convolution layer.
+
+    Args:
+        in_channels (int): number of input channels in out_conv layer.
+        out_channels (int): number of output channels in out_conv layer.
+        with_out_conv (bool): Whether to use out_conv layer
+        out_conv_cfg (dict): Config dict for convolution layer, which should
+            contain "groups", "kernel_size", "padding", "bias" to build
+            out_conv layer.
+        out_norm_cfg (dict): Config dict for normalization layer in out_conv.
+        out_conv_order (tuple): The order of conv/norm/activation layers in
+            out_conv.
+        with_input1_conv (bool): Whether to use convolution on input1.
+        with_input2_conv (bool): Whether to use convolution on input2.
+        input_conv_cfg (dict): Config dict for building input1_conv layer and
+            input2_conv layer, which is expected to contain the type of
+            convolution.
+            Default: None, which means using conv2d.
+        input_norm_cfg (dict): Config dict for normalization layer in
+            input1_conv and input2_conv layer. Default: None.
+        upsample_mode (str): Interpolation method used to resize the output
+            of input1_conv and input2_conv to target size. Currently, we
+            support ['nearest', 'bilinear']. Default: 'nearest'.
+    """
+
+    def __init__(self,
+                 fused_channels=256,
+                 out_channels=256,
+                 with_out_conv=True,
+                 out_conv_cfg=dict(
+                     groups=1, kernel_size=3, padding=1, bias=True),
+                 out_norm_cfg=None,
+                 out_conv_order=('act', 'conv', 'norm'),
+                 with_input1_conv=False,
+                 with_input2_conv=False,
+                 input_conv_cfg=None,
+                 input_norm_cfg=None,
+                 upsample_mode='nearest'):
+        super(BaseMergeCell, self).__init__()
+        assert upsample_mode in ['nearest', 'bilinear']
+        self.with_out_conv = with_out_conv
+        self.with_input1_conv = with_input1_conv
+        self.with_input2_conv = with_input2_conv
+        self.upsample_mode = upsample_mode
+
+        if self.with_out_conv:
+            self.out_conv = ConvModule(
+                fused_channels,
+                out_channels,
+                **out_conv_cfg,
+                norm_cfg=out_norm_cfg,
+                order=out_conv_order)
+
+        self.input1_conv = self._build_input_conv(
+            out_channels, input_conv_cfg,
+            input_norm_cfg) if with_input1_conv else nn.Sequential()
+        self.input2_conv = self._build_input_conv(
+            out_channels, input_conv_cfg,
+            input_norm_cfg) if with_input2_conv else nn.Sequential()
+
+    def _build_input_conv(self, channel, conv_cfg, norm_cfg):
+        return ConvModule(
+            channel,
+            channel,
+            3,
+            padding=1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            bias=True)
+
+    @abstractmethod
+    def _binary_op(self, x1, x2):
+        pass
+
+    def _resize(self, x, size):
+        if x.shape[-2:] == size:
+            return x
+        elif x.shape[-2:] < size:
+            return F.interpolate(x, size=size, mode=self.upsample_mode)
+        else:
+            assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
+            kernel_size = x.shape[-1] // size[-1]
+            x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
+            return x
+
+    def forward(self, x1, x2, out_size=None):
+        assert x1.shape[:2] == x2.shape[:2]
+        assert out_size is None or len(out_size) == 2
+        if out_size is None:  # resize to larger one
+            out_size = max(x1.size()[2:], x2.size()[2:])
+
+        x1 = self.input1_conv(x1)
+        x2 = self.input2_conv(x2)
+
+        x1 = self._resize(x1, out_size)
+        x2 = self._resize(x2, out_size)
+
+        x = self._binary_op(x1, x2)
+        if self.with_out_conv:
+            x = self.out_conv(x)
+        return x
+
+
+class SumCell(BaseMergeCell):
+
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
+
+    def _binary_op(self, x1, x2):
+        return x1 + x2
+
+
+class ConcatCell(BaseMergeCell):
+
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(ConcatCell, self).__init__(in_channels * 2, out_channels,
+                                         **kwargs)
+
+    def _binary_op(self, x1, x2):
+        ret = torch.cat([x1, x2], dim=1)
+        return ret
+
+
+class GlobalPoolingCell(BaseMergeCell):
+
+    def __init__(self, in_channels=None, out_channels=None, **kwargs):
+        super().__init__(in_channels, out_channels, **kwargs)
+        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
+
+    def _binary_op(self, x1, x2):
+        x2_att = self.global_pool(x2).sigmoid()
+        return x2 + x2_att * x1
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/modulated_deform_conv.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/modulated_deform_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8ff1adeb265e9c168b7c97856aa4c05e8fa4c1e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/modulated_deform_conv.py
@@ -0,0 +1,273 @@
+import math
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair, _single
+
+from mmcv.utils import deprecated_api_warning
+from ..cnn import CONV_LAYERS
+from ..utils import ext_loader, print_log
+
+ext_module = ext_loader.load_ext(
+    '_ext',
+    ['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])
+
+
+class ModulatedDeformConv2dFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, offset, mask, weight, bias, stride, padding,
+                 dilation, groups, deform_groups):
+        return g.op(
+            'MMCVModulatedDeformConv2d',
+            input,
+            offset,
+            mask,
+            weight,
+            bias,
+            stride_i=stride,
+            padding_i=padding,
+            dilation_i=dilation,
+            groups_i=groups,
+            deform_groups_i=deform_groups)
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                offset,
+                mask,
+                weight,
+                bias=None,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                deform_groups=1):
+        if input is not None and input.dim() != 4:
+            raise ValueError(
+                f'Expected 4D tensor as input, got {input.dim()}D tensor \
+                  instead.')
+        ctx.stride = _pair(stride)
+        ctx.padding = _pair(padding)
+        ctx.dilation = _pair(dilation)
+        ctx.groups = groups
+        ctx.deform_groups = deform_groups
+        ctx.with_bias = bias is not None
+        if not ctx.with_bias:
+            bias = input.new_empty(0)  # fake tensor
+        ctx.save_for_backward(input, offset, mask, weight, bias)
+        output = input.new_empty(
+            ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
+        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
+        ext_module.modulated_deform_conv_forward(
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
+            output,
+            ctx._bufs[1],
+            kernel_h=weight.size(2),
+            kernel_w=weight.size(3),
+            stride_h=ctx.stride[0],
+            stride_w=ctx.stride[1],
+            pad_h=ctx.padding[0],
+            pad_w=ctx.padding[1],
+            dilation_h=ctx.dilation[0],
+            dilation_w=ctx.dilation[1],
+            group=ctx.groups,
+            deformable_group=ctx.deform_groups,
+            with_bias=ctx.with_bias)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, offset, mask, weight, bias = ctx.saved_tensors
+        grad_input = torch.zeros_like(input)
+        grad_offset = torch.zeros_like(offset)
+        grad_mask = torch.zeros_like(mask)
+        grad_weight = torch.zeros_like(weight)
+        grad_bias = torch.zeros_like(bias)
+        grad_output = grad_output.contiguous()
+        ext_module.modulated_deform_conv_backward(
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
+            ctx._bufs[1],
+            grad_input,
+            grad_weight,
+            grad_bias,
+            grad_offset,
+            grad_mask,
+            grad_output,
+            kernel_h=weight.size(2),
+            kernel_w=weight.size(3),
+            stride_h=ctx.stride[0],
+            stride_w=ctx.stride[1],
+            pad_h=ctx.padding[0],
+            pad_w=ctx.padding[1],
+            dilation_h=ctx.dilation[0],
+            dilation_w=ctx.dilation[1],
+            group=ctx.groups,
+            deformable_group=ctx.deform_groups,
+            with_bias=ctx.with_bias)
+        if not ctx.with_bias:
+            grad_bias = None
+
+        return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
+                None, None, None, None, None)
+
+    @staticmethod
+    def _output_size(ctx, input, weight):
+        channels = weight.size(0)
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = ctx.padding[d]
+            kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride_ = ctx.stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(
+                'convolution input is too small (output would be ' +
+                'x'.join(map(str, output_size)) + ')')
+        return output_size
+
+
+modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply
+
+
+class ModulatedDeformConv2d(nn.Module):
+
+    @deprecated_api_warning({'deformable_groups': 'deform_groups'},
+                            cls_name='ModulatedDeformConv2d')
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 deform_groups=1,
+                 bias=True):
+        super(ModulatedDeformConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.deform_groups = deform_groups
+        # enable compatibility with nn.Conv2d
+        self.transposed = False
+        self.output_padding = _single(0)
+
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels // groups,
+                         *self.kernel_size))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.init_weights()
+
+    def init_weights(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.bias is not None:
+            self.bias.data.zero_()
+
+    def forward(self, x, offset, mask):
+        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
+                                       self.stride, self.padding,
+                                       self.dilation, self.groups,
+                                       self.deform_groups)
+
+
+@CONV_LAYERS.register_module('DCNv2')
+class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
+    """A ModulatedDeformable Conv Encapsulation that acts as normal Conv
+    layers.
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int or tuple[int]): Same as nn.Conv2d.
+        stride (int): Same as nn.Conv2d, while tuple is not supported.
+        padding (int): Same as nn.Conv2d, while tuple is not supported.
+        dilation (int): Same as nn.Conv2d, while tuple is not supported.
+        groups (int): Same as nn.Conv2d.
+        bias (bool or str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+            False.
+    """
+
+    _version = 2
+
+    def __init__(self, *args, **kwargs):
+        super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
+        self.conv_offset = nn.Conv2d(
+            self.in_channels,
+            self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
+            kernel_size=self.kernel_size,
+            stride=self.stride,
+            padding=self.padding,
+            dilation=self.dilation,
+            bias=True)
+        self.init_weights()
+
+    def init_weights(self):
+        super(ModulatedDeformConv2dPack, self).init_weights()
+        if hasattr(self, 'conv_offset'):
+            self.conv_offset.weight.data.zero_()
+            self.conv_offset.bias.data.zero_()
+
+    def forward(self, x):
+        out = self.conv_offset(x)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat((o1, o2), dim=1)
+        mask = torch.sigmoid(mask)
+        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
+                                       self.stride, self.padding,
+                                       self.dilation, self.groups,
+                                       self.deform_groups)
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        version = local_metadata.get('version', None)
+
+        if version is None or version < 2:
+            # the key is different in early versions
+            # In version < 2, ModulatedDeformConvPack
+            # loads previous benchmark models.
+            if (prefix + 'conv_offset.weight' not in state_dict
+                    and prefix[:-1] + '_offset.weight' in state_dict):
+                state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
+                    prefix[:-1] + '_offset.weight')
+            if (prefix + 'conv_offset.bias' not in state_dict
+                    and prefix[:-1] + '_offset.bias' in state_dict):
+                state_dict[prefix +
+                           'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
+                                                                '_offset.bias')
+
+        if version is not None and version > 1:
+            print_log(
+                f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
+                'version 2.',
+                logger='root')
+
+        super()._load_from_state_dict(state_dict, prefix, local_metadata,
+                                      strict, missing_keys, unexpected_keys,
+                                      error_msgs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/nms.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..05e7fdef2dd727169e677644c1465484d261cf91
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/nms.py
@@ -0,0 +1,391 @@
+import sys
+
+import numpy as np
+import torch
+
+from mmcv.utils import deprecated_api_warning
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+    '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated'])
+
+
+# This function is modified from: https://github.com/pytorch/vision/
+class NMSop(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, bboxes, scores, iou_threshold, offset):
+        inds = ext_module.nms(
+            bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
+        return inds
+
+    @staticmethod
+    def symbolic(g, bboxes, scores, iou_threshold, offset):
+        from ..onnx import is_custom_op_loaded
+        has_custom_op = is_custom_op_loaded()
+        if has_custom_op:
+            return g.op(
+                'mmcv::NonMaxSuppression',
+                bboxes,
+                scores,
+                iou_threshold_f=float(iou_threshold),
+                offset_i=int(offset))
+        else:
+            from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
+            boxes = unsqueeze(g, bboxes, 0)
+            scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
+            max_output_per_class = g.op(
+                'Constant',
+                value_t=torch.tensor([sys.maxsize], dtype=torch.long))
+            iou_threshold = g.op(
+                'Constant',
+                value_t=torch.tensor([iou_threshold], dtype=torch.float))
+            nms_out = g.op('NonMaxSuppression', boxes, scores,
+                           max_output_per_class, iou_threshold)
+            return squeeze(
+                g,
+                select(
+                    g, nms_out, 1,
+                    g.op(
+                        'Constant',
+                        value_t=torch.tensor([2], dtype=torch.long))), 1)
+
+
+class SoftNMSop(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
+                offset):
+        dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
+        inds = ext_module.softnms(
+            boxes.cpu(),
+            scores.cpu(),
+            dets.cpu(),
+            iou_threshold=float(iou_threshold),
+            sigma=float(sigma),
+            min_score=float(min_score),
+            method=int(method),
+            offset=int(offset))
+        return dets, inds
+
+    @staticmethod
+    def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method,
+                 offset):
+        from packaging import version
+        assert version.parse(torch.__version__) >= version.parse('1.7.0')
+        nms_out = g.op(
+            'mmcv::SoftNonMaxSuppression',
+            boxes,
+            scores,
+            iou_threshold_f=float(iou_threshold),
+            sigma_f=float(sigma),
+            min_score_f=float(min_score),
+            method_i=int(method),
+            offset_i=int(offset),
+            outputs=2)
+        return nms_out
+
+
+@deprecated_api_warning({'iou_thr': 'iou_threshold'})
+def nms(boxes, scores, iou_threshold, offset=0):
+    """Dispatch to either CPU or GPU NMS implementations.
+
+    The input can be either torch tensor or numpy array. GPU NMS will be used
+    if the input is gpu tensor, otherwise CPU NMS
+    will be used. The returned type will always be the same as inputs.
+
+    Arguments:
+        boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
+        scores (torch.Tensor or np.ndarray): scores in shape (N, ).
+        iou_threshold (float): IoU threshold for NMS.
+        offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
+
+    Returns:
+        tuple: kept dets(boxes and scores) and indice, which is always the \
+            same data type as the input.
+
+    Example:
+        >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
+        >>>                   [49.3, 32.9, 51.0, 35.3],
+        >>>                   [49.2, 31.8, 51.0, 35.4],
+        >>>                   [35.1, 11.5, 39.1, 15.7],
+        >>>                   [35.6, 11.8, 39.3, 14.2],
+        >>>                   [35.3, 11.5, 39.9, 14.5],
+        >>>                   [35.2, 11.7, 39.7, 15.7]], dtype=np.float32)
+        >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\
+               dtype=np.float32)
+        >>> iou_threshold = 0.6
+        >>> dets, inds = nms(boxes, scores, iou_threshold)
+        >>> assert len(inds) == len(dets) == 3
+    """
+    assert isinstance(boxes, (torch.Tensor, np.ndarray))
+    assert isinstance(scores, (torch.Tensor, np.ndarray))
+    is_numpy = False
+    if isinstance(boxes, np.ndarray):
+        is_numpy = True
+        boxes = torch.from_numpy(boxes)
+    if isinstance(scores, np.ndarray):
+        scores = torch.from_numpy(scores)
+    assert boxes.size(1) == 4
+    assert boxes.size(0) == scores.size(0)
+    assert offset in (0, 1)
+
+    if torch.__version__ == 'parrots':
+        x1 = boxes[:, 0]
+        y1 = boxes[:, 1]
+        x2 = boxes[:, 2]
+        y2 = boxes[:, 3]
+        areas = (x2 - x1 + offset) * (y2 - y1 + offset)
+        _, order = scores.sort(0, descending=True)
+        if boxes.device == 'cpu':
+            indata_list = [boxes, order, areas]
+            indata_dict = {
+                'iou_threshold': float(iou_threshold),
+                'offset': int(offset)
+            }
+            select = ext_module.nms(*indata_list, **indata_dict).byte()
+        else:
+            boxes_sorted = boxes.index_select(0, order)
+            indata_list = [boxes_sorted, order, areas]
+            indata_dict = {
+                'iou_threshold': float(iou_threshold),
+                'offset': int(offset)
+            }
+            select = ext_module.nms(*indata_list, **indata_dict)
+        inds = order.masked_select(select)
+    else:
+        inds = NMSop.apply(boxes, scores, iou_threshold, offset)
+    dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
+    if is_numpy:
+        dets = dets.cpu().numpy()
+        inds = inds.cpu().numpy()
+    return dets, inds
+
+
+@deprecated_api_warning({'iou_thr': 'iou_threshold'})
+def soft_nms(boxes,
+             scores,
+             iou_threshold=0.3,
+             sigma=0.5,
+             min_score=1e-3,
+             method='linear',
+             offset=0):
+    """Dispatch to only CPU Soft NMS implementations.
+
+    The input can be either a torch tensor or numpy array.
+    The returned type will always be the same as inputs.
+
+    Arguments:
+        boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
+        scores (torch.Tensor or np.ndarray): scores in shape (N, ).
+        iou_threshold (float): IoU threshold for NMS.
+        sigma (float): hyperparameter for gaussian method
+        min_score (float): score filter threshold
+        method (str): either 'linear' or 'gaussian'
+        offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
+
+    Returns:
+        tuple: kept dets(boxes and scores) and indice, which is always the \
+            same data type as the input.
+
+    Example:
+        >>> boxes = np.array([[4., 3., 5., 3.],
+        >>>                   [4., 3., 5., 4.],
+        >>>                   [3., 1., 3., 1.],
+        >>>                   [3., 1., 3., 1.],
+        >>>                   [3., 1., 3., 1.],
+        >>>                   [3., 1., 3., 1.]], dtype=np.float32)
+        >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32)
+        >>> iou_threshold = 0.6
+        >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5)
+        >>> assert len(inds) == len(dets) == 5
+    """
+
+    assert isinstance(boxes, (torch.Tensor, np.ndarray))
+    assert isinstance(scores, (torch.Tensor, np.ndarray))
+    is_numpy = False
+    if isinstance(boxes, np.ndarray):
+        is_numpy = True
+        boxes = torch.from_numpy(boxes)
+    if isinstance(scores, np.ndarray):
+        scores = torch.from_numpy(scores)
+    assert boxes.size(1) == 4
+    assert boxes.size(0) == scores.size(0)
+    assert offset in (0, 1)
+    method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
+    assert method in method_dict.keys()
+
+    if torch.__version__ == 'parrots':
+        x1 = boxes[:, 0]
+        y1 = boxes[:, 1]
+        x2 = boxes[:, 2]
+        y2 = boxes[:, 3]
+        areas = (x2 - x1 + offset) * (y2 - y1 + offset)
+        indata_list = [boxes.cpu(), scores.cpu(), areas.cpu()]
+        indata_dict = {
+            'iou_threshold': float(iou_threshold),
+            'sigma': float(sigma),
+            'min_score': min_score,
+            'method': method_dict[method],
+            'offset': int(offset)
+        }
+        dets, inds, num_out = ext_module.softnms(*indata_list, **indata_dict)
+        inds = inds[:num_out]
+    else:
+        dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
+                                     float(iou_threshold), float(sigma),
+                                     float(min_score), method_dict[method],
+                                     int(offset))
+    dets = dets[:inds.size(0)]
+
+    if is_numpy:
+        dets = dets.cpu().numpy()
+        inds = inds.cpu().numpy()
+        return dets, inds
+    else:
+        return dets.to(device=boxes.device), inds.to(device=boxes.device)
+
+
+def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
+    """Performs non-maximum suppression in a batched fashion.
+
+    Modified from https://github.com/pytorch/vision/blob
+    /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
+    In order to perform NMS independently per class, we add an offset to all
+    the boxes. The offset is dependent only on the class idx, and is large
+    enough so that boxes from different classes do not overlap.
+
+    Arguments:
+        boxes (torch.Tensor): boxes in shape (N, 4).
+        scores (torch.Tensor): scores in shape (N, ).
+        idxs (torch.Tensor): each index value correspond to a bbox cluster,
+            and NMS will not be applied between elements of different idxs,
+            shape (N, ).
+        nms_cfg (dict): specify nms type and other parameters like iou_thr.
+            Possible keys includes the following.
+
+            - iou_thr (float): IoU threshold used for NMS.
+            - split_thr (float): threshold number of boxes. In some cases the
+                number of boxes is large (e.g., 200k). To avoid OOM during
+                training, the users could set `split_thr` to a small value.
+                If the number of boxes is greater than the threshold, it will
+                perform NMS on each group of boxes separately and sequentially.
+                Defaults to 10000.
+        class_agnostic (bool): if true, nms is class agnostic,
+            i.e. IoU thresholding happens over all boxes,
+            regardless of the predicted class.
+
+    Returns:
+        tuple: kept dets and indice.
+    """
+    nms_cfg_ = nms_cfg.copy()
+    class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
+    if class_agnostic:
+        boxes_for_nms = boxes
+    else:
+        max_coordinate = boxes.max()
+        offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
+        boxes_for_nms = boxes + offsets[:, None]
+
+    nms_type = nms_cfg_.pop('type', 'nms')
+    nms_op = eval(nms_type)
+
+    split_thr = nms_cfg_.pop('split_thr', 10000)
+    # Won't split to multiple nms nodes when exporting to onnx
+    if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
+        dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
+        boxes = boxes[keep]
+        scores = dets[:, -1]
+    else:
+        total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
+        for id in torch.unique(idxs):
+            mask = (idxs == id).nonzero(as_tuple=False).view(-1)
+            dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_)
+            total_mask[mask[keep]] = True
+
+        keep = total_mask.nonzero(as_tuple=False).view(-1)
+        keep = keep[scores[keep].argsort(descending=True)]
+        boxes = boxes[keep]
+        scores = scores[keep]
+
+    return torch.cat([boxes, scores[:, None]], -1), keep
+
+
+def nms_match(dets, iou_threshold):
+    """Matched dets into different groups by NMS.
+
+    NMS match is Similar to NMS but when a bbox is suppressed, nms match will
+    record the indice of suppressed bbox and form a group with the indice of
+    kept bbox. In each group, indice is sorted as score order.
+
+    Arguments:
+        dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
+        iou_thr (float): IoU thresh for NMS.
+
+    Returns:
+        List[torch.Tensor | np.ndarray]: The outer list corresponds different
+            matched group, the inner Tensor corresponds the indices for a group
+            in score order.
+    """
+    if dets.shape[0] == 0:
+        matched = []
+    else:
+        assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
+                                    f'but get {dets.shape}'
+        if isinstance(dets, torch.Tensor):
+            dets_t = dets.detach().cpu()
+        else:
+            dets_t = torch.from_numpy(dets)
+        matched = ext_module.nms_match(dets_t, float(iou_threshold))
+
+    if isinstance(dets, torch.Tensor):
+        return [dets.new_tensor(m, dtype=torch.long) for m in matched]
+    else:
+        return [np.array(m, dtype=np.int) for m in matched]
+
+
+def nms_rotated(dets, scores, iou_threshold, labels=None):
+    """Performs non-maximum suppression (NMS) on the rotated boxes according to
+    their intersection-over-union (IoU).
+
+    Rotated NMS iteratively removes lower scoring rotated boxes which have an
+    IoU greater than iou_threshold with another (higher scoring) rotated box.
+
+    Args:
+        boxes (Tensor):  Rotated boxes in shape (N, 5). They are expected to \
+            be in (x_ctr, y_ctr, width, height, angle_radian) format.
+        scores (Tensor): scores in shape (N, ).
+        iou_threshold (float): IoU thresh for NMS.
+        labels (Tensor): boxes's label in shape (N,).
+
+    Returns:
+        tuple: kept dets(boxes and scores) and indice, which is always the \
+            same data type as the input.
+    """
+    if dets.shape[0] == 0:
+        return dets, None
+    multi_label = labels is not None
+    if multi_label:
+        dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1)
+    else:
+        dets_wl = dets
+    _, order = scores.sort(0, descending=True)
+    dets_sorted = dets_wl.index_select(0, order)
+
+    if torch.__version__ == 'parrots':
+        select = torch.zeros((dets.shape[0]),
+                             dtype=torch.int64).to(dets.device)
+        ext_module.nms_rotated(
+            dets_wl,
+            scores,
+            dets_sorted,
+            select,
+            iou_threshold=iou_threshold,
+            multi_label=multi_label)
+        keep_inds = order.masked_select(select == 1)
+    else:
+        keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
+                                           iou_threshold, multi_label)
+    dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
+                     dim=1)
+    return dets, keep_inds
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/point_sample.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/point_sample.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5f59d3f184ee95d183c59e4d5518bc313567a4c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/point_sample.py
@@ -0,0 +1,214 @@
+# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend  # noqa
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.utils import _pair
+
+
+def normalize(grid):
+    """Normalize input grid from [-1, 1] to [0, 1]
+    Args:
+        grid (Tensor): The grid to be normalize, range [-1, 1].
+    Returns:
+        Tensor: Normalized grid, range [0, 1].
+    """
+
+    return (grid + 1.0) / 2.0
+
+
+def denormalize(grid):
+    """Denormalize input grid from range [0, 1] to [-1, 1]
+    Args:
+        grid (Tensor): The grid to be denormalize, range [0, 1].
+    Returns:
+        Tensor: Denormalized grid, range [-1, 1].
+    """
+
+    return grid * 2.0 - 1.0
+
+
+def generate_grid(num_grid, size, device):
+    """Generate regular square grid of points in [0, 1] x [0, 1] coordinate
+    space.
+
+    Args:
+        num_grid (int): The number of grids to sample, one for each region.
+        size (tuple(int, int)): The side size of the regular grid.
+        device (torch.device): Desired device of returned tensor.
+
+    Returns:
+        (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
+            contains coordinates for the regular grids.
+    """
+
+    affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
+    grid = F.affine_grid(
+        affine_trans, torch.Size((1, 1, *size)), align_corners=False)
+    grid = normalize(grid)
+    return grid.view(1, -1, 2).expand(num_grid, -1, -1)
+
+
+def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
+    """Convert roi based relative point coordinates to image based absolute
+    point coordinates.
+
+    Args:
+        rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
+        rel_roi_points (Tensor): Point coordinates inside RoI, relative to
+            RoI, location, range (0, 1), shape (N, P, 2)
+    Returns:
+        Tensor: Image based absolute point coordinates, shape (N, P, 2)
+    """
+
+    with torch.no_grad():
+        assert rel_roi_points.size(0) == rois.size(0)
+        assert rois.dim() == 2
+        assert rel_roi_points.dim() == 3
+        assert rel_roi_points.size(2) == 2
+        # remove batch idx
+        if rois.size(1) == 5:
+            rois = rois[:, 1:]
+        abs_img_points = rel_roi_points.clone()
+        abs_img_points[:, :, 0] = abs_img_points[:, :, 0] * (
+            rois[:, None, 2] - rois[:, None, 0])
+        abs_img_points[:, :, 1] = abs_img_points[:, :, 1] * (
+            rois[:, None, 3] - rois[:, None, 1])
+        abs_img_points[:, :, 0] += rois[:, None, 0]
+        abs_img_points[:, :, 1] += rois[:, None, 1]
+    return abs_img_points
+
+
+def abs_img_point_to_rel_img_point(abs_img_points,
+                                   img_shape,
+                                   spatial_scale=1.):
+    """Convert image based absolute point coordinates to image based relative
+    coordinates for sampling.
+
+    Args:
+        abs_img_points (Tensor): Image based absolute point coordinates,
+            shape (N, P, 2)
+        img_shape (tuple): (height, width) of image or feature map.
+        spatial_scale (float): Scale points by this factor. Default: 1.
+
+    Returns:
+        Tensor: Image based relative point coordinates for sampling,
+            shape (N, P, 2)
+    """
+
+    assert isinstance(img_shape, tuple) and len(img_shape) == 2
+    h, w = img_shape
+    scale = torch.tensor([w, h],
+                         dtype=torch.float,
+                         device=abs_img_points.device)
+    scale = scale.view(1, 1, 2)
+    rel_img_points = abs_img_points / scale * spatial_scale
+
+    return rel_img_points
+
+
+def rel_roi_point_to_rel_img_point(rois,
+                                   rel_roi_points,
+                                   img_shape,
+                                   spatial_scale=1.):
+    """Convert roi based relative point coordinates to image based absolute
+    point coordinates.
+
+    Args:
+        rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
+        rel_roi_points (Tensor): Point coordinates inside RoI, relative to
+            RoI, location, range (0, 1), shape (N, P, 2)
+        img_shape (tuple): (height, width) of image or feature map.
+        spatial_scale (float): Scale points by this factor. Default: 1.
+
+    Returns:
+        Tensor: Image based relative point coordinates for sampling,
+            shape (N, P, 2)
+    """
+
+    abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
+    rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img_shape,
+                                                   spatial_scale)
+
+    return rel_img_point
+
+
+def point_sample(input, points, align_corners=False, **kwargs):
+    """A wrapper around :func:`grid_sample` to support 3D point_coords tensors
+    Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
+    lie inside ``[0, 1] x [0, 1]`` square.
+
+    Args:
+        input (Tensor): Feature map, shape (N, C, H, W).
+        points (Tensor): Image based absolute point coordinates (normalized),
+            range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
+        align_corners (bool): Whether align_corners. Default: False
+
+    Returns:
+        Tensor: Features of `point` on `input`, shape (N, C, P) or
+            (N, C, Hgrid, Wgrid).
+    """
+
+    add_dim = False
+    if points.dim() == 3:
+        add_dim = True
+        points = points.unsqueeze(2)
+    output = F.grid_sample(
+        input, denormalize(points), align_corners=align_corners, **kwargs)
+    if add_dim:
+        output = output.squeeze(3)
+    return output
+
+
+class SimpleRoIAlign(nn.Module):
+
+    def __init__(self, output_size, spatial_scale, aligned=True):
+        """Simple RoI align in PointRend, faster than standard RoIAlign.
+
+        Args:
+            output_size (tuple[int]): h, w
+            spatial_scale (float): scale the input boxes by this number
+            aligned (bool): if False, use the legacy implementation in
+                MMDetection, align_corners=True will be used in F.grid_sample.
+                If True, align the results more perfectly.
+        """
+
+        super(SimpleRoIAlign, self).__init__()
+        self.output_size = _pair(output_size)
+        self.spatial_scale = float(spatial_scale)
+        # to be consistent with other RoI ops
+        self.use_torchvision = False
+        self.aligned = aligned
+
+    def forward(self, features, rois):
+
+        num_imgs = features.size(0)
+        num_rois = rois.size(0)
+        rel_roi_points = generate_grid(
+            num_rois, self.output_size, device=rois.device)
+
+        point_feats = []
+        for batch_ind in range(num_imgs):
+            # unravel batch dim
+            feat = features[batch_ind].unsqueeze(0)
+            inds = (rois[:, 0].long() == batch_ind)
+            if inds.any():
+                rel_img_points = rel_roi_point_to_rel_img_point(
+                    rois[inds], rel_roi_points[inds], feat.shape[2:],
+                    self.spatial_scale).unsqueeze(0)
+                point_feat = point_sample(
+                    feat, rel_img_points, align_corners=not self.aligned)
+                point_feat = point_feat.squeeze(0).transpose(0, 1)
+                point_feats.append(point_feat)
+
+        channels = features.size(1)
+        roi_feats = torch.cat(point_feats, dim=0)
+        roi_feats = roi_feats.reshape(num_rois, channels, *self.output_size)
+
+        return roi_feats
+
+    def __repr__(self):
+        format_str = self.__class__.__name__
+        format_str += '(output_size={}, spatial_scale={}'.format(
+            self.output_size, self.spatial_scale)
+        return format_str
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/psa_mask.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/psa_mask.py
new file mode 100644
index 0000000000000000000000000000000000000000..0652594a1ac734f8539158e21fd504b09556e95e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/psa_mask.py
@@ -0,0 +1,89 @@
+# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
+from torch import nn
+from torch.autograd import Function
+from torch.nn.modules.utils import _pair
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext',
+                                 ['psamask_forward', 'psamask_backward'])
+
+
+class PSAMaskFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, psa_type, mask_size):
+        return g.op(
+            'MMCVPSAMask', input, psa_type=psa_type, mask_size=mask_size)
+
+    @staticmethod
+    def forward(ctx, input, psa_type, mask_size):
+        ctx.psa_type = psa_type
+        ctx.mask_size = _pair(mask_size)
+        ctx.save_for_backward(input)
+
+        h_mask, w_mask = ctx.mask_size
+        batch_size, channels, h_feature, w_feature = input.size()
+        assert channels == h_mask * w_mask
+        output = input.new_zeros(
+            (batch_size, h_feature * w_feature, h_feature, w_feature))
+
+        ext_module.psamask_forward(
+            input,
+            output,
+            psa_type=psa_type,
+            num_=batch_size,
+            h_feature=h_feature,
+            w_feature=w_feature,
+            h_mask=h_mask,
+            w_mask=w_mask,
+            half_h_mask=(h_mask - 1) // 2,
+            half_w_mask=(w_mask - 1) // 2)
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input = ctx.saved_tensors[0]
+        psa_type = ctx.psa_type
+        h_mask, w_mask = ctx.mask_size
+        batch_size, channels, h_feature, w_feature = input.size()
+        grad_input = grad_output.new_zeros(
+            (batch_size, channels, h_feature, w_feature))
+        ext_module.psamask_backward(
+            grad_output,
+            grad_input,
+            psa_type=psa_type,
+            num_=batch_size,
+            h_feature=h_feature,
+            w_feature=w_feature,
+            h_mask=h_mask,
+            w_mask=w_mask,
+            half_h_mask=(h_mask - 1) // 2,
+            half_w_mask=(w_mask - 1) // 2)
+        return grad_input, None, None, None
+
+
+psa_mask = PSAMaskFunction.apply
+
+
+class PSAMask(nn.Module):
+
+    def __init__(self, psa_type, mask_size=None):
+        super(PSAMask, self).__init__()
+        assert psa_type in ['collect', 'distribute']
+        if psa_type == 'collect':
+            psa_type_enum = 0
+        else:
+            psa_type_enum = 1
+        self.psa_type_enum = psa_type_enum
+        self.mask_size = mask_size
+        self.psa_type = psa_type
+
+    def forward(self, input):
+        return psa_mask(input, self.psa_type_enum, self.mask_size)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(psa_type={self.psa_type}, '
+        s += f'mask_size={self.mask_size})'
+        return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/roi_align.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/roi_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..d050c2ac8894a7e96acb481087f003d008cc4ae5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/roi_align.py
@@ -0,0 +1,222 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from ..utils import deprecated_api_warning, ext_loader
+
+ext_module = ext_loader.load_ext('_ext',
+                                 ['roi_align_forward', 'roi_align_backward'])
+
+
+class RoIAlignFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
+                 pool_mode, aligned):
+        from ..onnx import is_custom_op_loaded
+        has_custom_op = is_custom_op_loaded()
+        if has_custom_op:
+            return g.op(
+                'mmcv::MMCVRoiAlign',
+                input,
+                rois,
+                output_height_i=output_size[0],
+                output_width_i=output_size[1],
+                spatial_scale_f=spatial_scale,
+                sampling_ratio_i=sampling_ratio,
+                mode_s=pool_mode,
+                aligned_i=aligned)
+        else:
+            from torch.onnx.symbolic_opset9 import sub, squeeze
+            from torch.onnx.symbolic_helper import _slice_helper
+            from torch.onnx import TensorProtoDataType
+            # batch_indices = rois[:, 0].long()
+            batch_indices = _slice_helper(
+                g, rois, axes=[1], starts=[0], ends=[1])
+            batch_indices = squeeze(g, batch_indices, 1)
+            batch_indices = g.op(
+                'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
+            # rois = rois[:, 1:]
+            rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
+            if aligned:
+                # rois -= 0.5/spatial_scale
+                aligned_offset = g.op(
+                    'Constant',
+                    value_t=torch.tensor([0.5 / spatial_scale],
+                                         dtype=torch.float32))
+                rois = sub(g, rois, aligned_offset)
+            # roi align
+            return g.op(
+                'RoiAlign',
+                input,
+                rois,
+                batch_indices,
+                output_height_i=output_size[0],
+                output_width_i=output_size[1],
+                spatial_scale_f=spatial_scale,
+                sampling_ratio_i=max(0, sampling_ratio),
+                mode_s=pool_mode)
+
+    @staticmethod
+    def forward(ctx,
+                input,
+                rois,
+                output_size,
+                spatial_scale=1.0,
+                sampling_ratio=0,
+                pool_mode='avg',
+                aligned=True):
+        ctx.output_size = _pair(output_size)
+        ctx.spatial_scale = spatial_scale
+        ctx.sampling_ratio = sampling_ratio
+        assert pool_mode in ('max', 'avg')
+        ctx.pool_mode = 0 if pool_mode == 'max' else 1
+        ctx.aligned = aligned
+        ctx.input_shape = input.size()
+
+        assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
+
+        output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
+                        ctx.output_size[1])
+        output = input.new_zeros(output_shape)
+        if ctx.pool_mode == 0:
+            argmax_y = input.new_zeros(output_shape)
+            argmax_x = input.new_zeros(output_shape)
+        else:
+            argmax_y = input.new_zeros(0)
+            argmax_x = input.new_zeros(0)
+
+        ext_module.roi_align_forward(
+            input,
+            rois,
+            output,
+            argmax_y,
+            argmax_x,
+            aligned_height=ctx.output_size[0],
+            aligned_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale,
+            sampling_ratio=ctx.sampling_ratio,
+            pool_mode=ctx.pool_mode,
+            aligned=ctx.aligned)
+
+        ctx.save_for_backward(rois, argmax_y, argmax_x)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        rois, argmax_y, argmax_x = ctx.saved_tensors
+        grad_input = grad_output.new_zeros(ctx.input_shape)
+        # complex head architecture may cause grad_output uncontiguous.
+        grad_output = grad_output.contiguous()
+        ext_module.roi_align_backward(
+            grad_output,
+            rois,
+            argmax_y,
+            argmax_x,
+            grad_input,
+            aligned_height=ctx.output_size[0],
+            aligned_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale,
+            sampling_ratio=ctx.sampling_ratio,
+            pool_mode=ctx.pool_mode,
+            aligned=ctx.aligned)
+        return grad_input, None, None, None, None, None, None
+
+
+roi_align = RoIAlignFunction.apply
+
+
+class RoIAlign(nn.Module):
+    """RoI align pooling layer.
+
+    Args:
+        output_size (tuple): h, w
+        spatial_scale (float): scale the input boxes by this number
+        sampling_ratio (int): number of inputs samples to take for each
+            output sample. 0 to take samples densely for current models.
+        pool_mode (str, 'avg' or 'max'): pooling mode in each bin.
+        aligned (bool): if False, use the legacy implementation in
+            MMDetection. If True, align the results more perfectly.
+        use_torchvision (bool): whether to use roi_align from torchvision.
+
+    Note:
+        The implementation of RoIAlign when aligned=True is modified from
+        https://github.com/facebookresearch/detectron2/
+
+        The meaning of aligned=True:
+
+        Given a continuous coordinate c, its two neighboring pixel
+        indices (in our pixel model) are computed by floor(c - 0.5) and
+        ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
+        indices [0] and [1] (which are sampled from the underlying signal
+        at continuous coordinates 0.5 and 1.5). But the original roi_align
+        (aligned=False) does not subtract the 0.5 when computing
+        neighboring pixel indices and therefore it uses pixels with a
+        slightly incorrect alignment (relative to our pixel model) when
+        performing bilinear interpolation.
+
+        With `aligned=True`,
+        we first appropriately scale the ROI and then shift it by -0.5
+        prior to calling roi_align. This produces the correct neighbors;
+
+        The difference does not make a difference to the model's
+        performance if ROIAlign is used together with conv layers.
+    """
+
+    @deprecated_api_warning(
+        {
+            'out_size': 'output_size',
+            'sample_num': 'sampling_ratio'
+        },
+        cls_name='RoIAlign')
+    def __init__(self,
+                 output_size,
+                 spatial_scale=1.0,
+                 sampling_ratio=0,
+                 pool_mode='avg',
+                 aligned=True,
+                 use_torchvision=False):
+        super(RoIAlign, self).__init__()
+
+        self.output_size = _pair(output_size)
+        self.spatial_scale = float(spatial_scale)
+        self.sampling_ratio = int(sampling_ratio)
+        self.pool_mode = pool_mode
+        self.aligned = aligned
+        self.use_torchvision = use_torchvision
+
+    def forward(self, input, rois):
+        """
+        Args:
+            input: NCHW images
+            rois: Bx5 boxes. First column is the index into N.\
+                The other 4 columns are xyxy.
+        """
+        if self.use_torchvision:
+            from torchvision.ops import roi_align as tv_roi_align
+            if 'aligned' in tv_roi_align.__code__.co_varnames:
+                return tv_roi_align(input, rois, self.output_size,
+                                    self.spatial_scale, self.sampling_ratio,
+                                    self.aligned)
+            else:
+                if self.aligned:
+                    rois -= rois.new_tensor([0.] +
+                                            [0.5 / self.spatial_scale] * 4)
+                return tv_roi_align(input, rois, self.output_size,
+                                    self.spatial_scale, self.sampling_ratio)
+        else:
+            return roi_align(input, rois, self.output_size, self.spatial_scale,
+                             self.sampling_ratio, self.pool_mode, self.aligned)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(output_size={self.output_size}, '
+        s += f'spatial_scale={self.spatial_scale}, '
+        s += f'sampling_ratio={self.sampling_ratio}, '
+        s += f'pool_mode={self.pool_mode}, '
+        s += f'aligned={self.aligned}, '
+        s += f'use_torchvision={self.use_torchvision})'
+        return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/roi_pool.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/roi_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..20e244d8a447e777f1482f04110a02d3985eeeaf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/roi_pool.py
@@ -0,0 +1,85 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext',
+                                 ['roi_pool_forward', 'roi_pool_backward'])
+
+
+class RoIPoolFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, rois, output_size, spatial_scale):
+        return g.op(
+            'MaxRoiPool',
+            input,
+            rois,
+            pooled_shape_i=output_size,
+            spatial_scale_f=spatial_scale)
+
+    @staticmethod
+    def forward(ctx, input, rois, output_size, spatial_scale=1.0):
+        ctx.output_size = _pair(output_size)
+        ctx.spatial_scale = spatial_scale
+        ctx.input_shape = input.size()
+
+        assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
+
+        output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
+                        ctx.output_size[1])
+        output = input.new_zeros(output_shape)
+        argmax = input.new_zeros(output_shape, dtype=torch.int)
+
+        ext_module.roi_pool_forward(
+            input,
+            rois,
+            output,
+            argmax,
+            pooled_height=ctx.output_size[0],
+            pooled_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale)
+
+        ctx.save_for_backward(rois, argmax)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        rois, argmax = ctx.saved_tensors
+        grad_input = grad_output.new_zeros(ctx.input_shape)
+
+        ext_module.roi_pool_backward(
+            grad_output,
+            rois,
+            argmax,
+            grad_input,
+            pooled_height=ctx.output_size[0],
+            pooled_width=ctx.output_size[1],
+            spatial_scale=ctx.spatial_scale)
+
+        return grad_input, None, None, None
+
+
+roi_pool = RoIPoolFunction.apply
+
+
+class RoIPool(nn.Module):
+
+    def __init__(self, output_size, spatial_scale=1.0):
+        super(RoIPool, self).__init__()
+
+        self.output_size = _pair(output_size)
+        self.spatial_scale = float(spatial_scale)
+
+    def forward(self, input, rois):
+        return roi_pool(input, rois, self.output_size, self.spatial_scale)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'(output_size={self.output_size}, '
+        s += f'spatial_scale={self.spatial_scale})'
+        return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/saconv.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/saconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd7eea122fd4ba54c8be2dab60e6497d9a9acc3d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/saconv.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init
+from mmcv.ops.deform_conv import deform_conv2d
+from mmcv.utils import TORCH_VERSION
+
+
+@CONV_LAYERS.register_module(name='SAC')
+class SAConv2d(ConvAWS2d):
+    """SAC (Switchable Atrous Convolution)
+
+    This is an implementation of SAC in DetectoRS
+    (https://arxiv.org/pdf/2006.02334.pdf).
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of
+            the input. Default: 0
+        padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
+            ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
+        dilation (int or tuple, optional): Spacing between kernel elements.
+            Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the
+            output. Default: ``True``
+        use_deform: If ``True``, replace convolution with deformable
+            convolution. Default: ``False``.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 use_deform=False):
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias)
+        self.use_deform = use_deform
+        self.switch = nn.Conv2d(
+            self.in_channels, 1, kernel_size=1, stride=stride, bias=True)
+        self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size()))
+        self.pre_context = nn.Conv2d(
+            self.in_channels, self.in_channels, kernel_size=1, bias=True)
+        self.post_context = nn.Conv2d(
+            self.out_channels, self.out_channels, kernel_size=1, bias=True)
+        if self.use_deform:
+            self.offset_s = nn.Conv2d(
+                self.in_channels,
+                18,
+                kernel_size=3,
+                padding=1,
+                stride=stride,
+                bias=True)
+            self.offset_l = nn.Conv2d(
+                self.in_channels,
+                18,
+                kernel_size=3,
+                padding=1,
+                stride=stride,
+                bias=True)
+        self.init_weights()
+
+    def init_weights(self):
+        constant_init(self.switch, 0, bias=1)
+        self.weight_diff.data.zero_()
+        constant_init(self.pre_context, 0)
+        constant_init(self.post_context, 0)
+        if self.use_deform:
+            constant_init(self.offset_s, 0)
+            constant_init(self.offset_l, 0)
+
+    def forward(self, x):
+        # pre-context
+        avg_x = F.adaptive_avg_pool2d(x, output_size=1)
+        avg_x = self.pre_context(avg_x)
+        avg_x = avg_x.expand_as(x)
+        x = x + avg_x
+        # switch
+        avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect')
+        avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0)
+        switch = self.switch(avg_x)
+        # sac
+        weight = self._get_weight(self.weight)
+        if self.use_deform:
+            offset = self.offset_s(avg_x)
+            out_s = deform_conv2d(x, offset, weight, self.stride, self.padding,
+                                  self.dilation, self.groups, 1)
+        else:
+            if TORCH_VERSION < '1.5.0' or TORCH_VERSION == 'parrots':
+                out_s = super().conv2d_forward(x, weight)
+            else:
+                out_s = super()._conv_forward(x, weight)
+        ori_p = self.padding
+        ori_d = self.dilation
+        self.padding = tuple(3 * p for p in self.padding)
+        self.dilation = tuple(3 * d for d in self.dilation)
+        weight = weight + self.weight_diff
+        if self.use_deform:
+            offset = self.offset_l(avg_x)
+            out_l = deform_conv2d(x, offset, weight, self.stride, self.padding,
+                                  self.dilation, self.groups, 1)
+        else:
+            if TORCH_VERSION < '1.5.0' or TORCH_VERSION == 'parrots':
+                out_l = super().conv2d_forward(x, weight)
+            else:
+                out_l = super()._conv_forward(x, weight)
+        out = switch * out_s + (1 - switch) * out_l
+        self.padding = ori_p
+        self.dilation = ori_d
+        # post-context
+        avg_x = F.adaptive_avg_pool2d(out, output_size=1)
+        avg_x = self.post_context(avg_x)
+        avg_x = avg_x.expand_as(out)
+        out = out + avg_x
+        return out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/sync_bn.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/sync_bn.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2499f9dcc07276ca7f9420e79a5fbc8ee10cf6c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/sync_bn.py
@@ -0,0 +1,195 @@
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+from mmcv.cnn import NORM_LAYERS
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+    'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output',
+    'sync_bn_backward_param', 'sync_bn_backward_data'
+])
+
+
+class SyncBatchNormFunction(Function):
+
+    @staticmethod
+    def symbolic(g, input, running_mean, running_var, weight, bias, momentum,
+                 eps, group, group_size):
+        return g.op(
+            'MMCVSyncBatchNorm',
+            input,
+            running_mean,
+            running_var,
+            weight,
+            bias,
+            momentum=momentum,
+            eps=eps,
+            group=group,
+            group_size=group_size)
+
+    @staticmethod
+    def forward(self, input, running_mean, running_var, weight, bias, momentum,
+                eps, group, group_size):
+        self.momentum = momentum
+        self.eps = eps
+        self.group = group
+        self.group_size = group_size
+
+        assert isinstance(
+                   input, (torch.HalfTensor, torch.FloatTensor,
+                           torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \
+               f'only support Half or Float Tensor, but {input.type()}'
+        output = torch.empty_like(input)
+        input3d = input.view(input.size(0), input.size(1), -1)
+        output3d = output.view_as(input3d)
+
+        mean = torch.empty(
+            input3d.size(1), dtype=torch.float, device=input3d.device)
+        var = torch.empty(
+            input3d.size(1), dtype=torch.float, device=input3d.device)
+        norm = torch.empty_like(
+            input3d, dtype=torch.float, device=input3d.device)
+        std = torch.empty(
+            input3d.size(1), dtype=torch.float, device=input3d.device)
+
+        ext_module.sync_bn_forward_mean(input3d, mean)
+        if self.group_size > 1:
+            dist.all_reduce(mean, group=self.group)
+            mean /= self.group_size
+        ext_module.sync_bn_forward_var(input3d, mean, var)
+        if self.group_size > 1:
+            dist.all_reduce(var, group=self.group)
+            var /= self.group_size
+        ext_module.sync_bn_forward_output(
+            input3d,
+            mean,
+            var,
+            weight,
+            bias,
+            running_mean,
+            running_var,
+            norm,
+            std,
+            output3d,
+            eps=self.eps,
+            momentum=self.momentum,
+            group_size=self.group_size)
+        self.save_for_backward(norm, std, weight)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(self, grad_output):
+        norm, std, weight = self.saved_tensors
+        grad_weight = torch.empty_like(weight)
+        grad_bias = torch.empty_like(weight)
+        grad_input = torch.empty_like(grad_output)
+        grad_output3d = grad_output.view(
+            grad_output.size(0), grad_output.size(1), -1)
+        grad_input3d = grad_input.view_as(grad_output3d)
+        ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight,
+                                          grad_bias)
+        # all reduce
+        if self.group_size > 1:
+            dist.all_reduce(grad_weight, group=self.group)
+            dist.all_reduce(grad_bias, group=self.group)
+            grad_weight /= self.group_size
+            grad_bias /= self.group_size
+        ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight,
+                                         grad_bias, norm, std, grad_input3d)
+        return grad_input, None, None, grad_weight, grad_bias, \
+            None, None, None, None
+
+
+@NORM_LAYERS.register_module(name='MMSyncBN')
+class SyncBatchNorm(Module):
+
+    def __init__(self,
+                 num_features,
+                 eps=1e-5,
+                 momentum=0.1,
+                 affine=True,
+                 track_running_stats=True,
+                 group=None):
+        super(SyncBatchNorm, self).__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.momentum = momentum
+        self.affine = affine
+        self.track_running_stats = track_running_stats
+        group = dist.group.WORLD if group is None else group
+        self.group = group
+        self.group_size = dist.get_world_size(group)
+        if self.affine:
+            self.weight = Parameter(torch.Tensor(num_features))
+            self.bias = Parameter(torch.Tensor(num_features))
+        else:
+            self.register_parameter('weight', None)
+            self.register_parameter('bias', None)
+        if self.track_running_stats:
+            self.register_buffer('running_mean', torch.zeros(num_features))
+            self.register_buffer('running_var', torch.ones(num_features))
+            self.register_buffer('num_batches_tracked',
+                                 torch.tensor(0, dtype=torch.long))
+        else:
+            self.register_buffer('running_mean', None)
+            self.register_buffer('running_var', None)
+            self.register_buffer('num_batches_tracked', None)
+        self.reset_parameters()
+
+    def reset_running_stats(self):
+        if self.track_running_stats:
+            self.running_mean.zero_()
+            self.running_var.fill_(1)
+            self.num_batches_tracked.zero_()
+
+    def reset_parameters(self):
+        self.reset_running_stats()
+        if self.affine:
+            self.weight.data.uniform_()  # pytorch use ones_()
+            self.bias.data.zero_()
+
+    def forward(self, input):
+        if input.dim() < 2:
+            raise ValueError(
+                f'expected at least 2D input, got {input.dim()}D input')
+        if self.momentum is None:
+            exponential_average_factor = 0.0
+        else:
+            exponential_average_factor = self.momentum
+
+        if self.training and self.track_running_stats:
+            if self.num_batches_tracked is not None:
+                self.num_batches_tracked += 1
+                if self.momentum is None:  # use cumulative moving average
+                    exponential_average_factor = 1.0 / float(
+                        self.num_batches_tracked)
+                else:  # use exponential moving average
+                    exponential_average_factor = self.momentum
+
+        if self.training or not self.track_running_stats:
+            return SyncBatchNormFunction.apply(input, self.running_mean,
+                                               self.running_var, self.weight,
+                                               self.bias,
+                                               exponential_average_factor,
+                                               self.eps, self.group,
+                                               self.group_size)
+        else:
+            return F.batch_norm(input, self.running_mean, self.running_var,
+                                self.weight, self.bias, False,
+                                exponential_average_factor, self.eps)
+
+    def __repr__(self):
+        s = self.__class__.__name__
+        s += f'({self.num_features}, '
+        s += f'eps={self.eps}, '
+        s += f'momentum={self.momentum}, '
+        s += f'affine={self.affine}, '
+        s += f'track_running_stats={self.track_running_stats}, '
+        s += f'group_size={self.group_size})'
+        return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/tin_shift.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/tin_shift.py
new file mode 100644
index 0000000000000000000000000000000000000000..5560af44691dc39219a788f35e1f027d930a519c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/ops/tin_shift.py
@@ -0,0 +1,62 @@
+# Code reference from "Temporal Interlacing Network"
+# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py
+# Hao Shao, Shengju Qian, Yu Liu
+# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext',
+                                 ['tin_shift_forward', 'tin_shift_backward'])
+
+
+class TINShiftFunction(Function):
+
+    @staticmethod
+    def forward(ctx, input, shift):
+
+        ctx.save_for_backward(shift)
+
+        out = torch.zeros_like(input)
+        ext_module.tin_shift_forward(input, shift, out)
+
+        return out
+
+    @staticmethod
+    def backward(ctx, grad_output):
+
+        shift = ctx.saved_tensors[0]
+        data_grad_input = grad_output.new(*grad_output.size()).zero_()
+        shift_grad_input = shift.new(*shift.size()).zero_()
+        ext_module.tin_shift_backward(grad_output, shift, data_grad_input)
+
+        return data_grad_input, shift_grad_input
+
+
+tin_shift = TINShiftFunction.apply
+
+
+class TINShift(nn.Module):
+    """Temporal Interlace Shift.
+
+    Temporal Interlace shift is a differentiable temporal-wise frame shifting
+    which is proposed in "Temporal Interlacing Network"
+
+    Please refer to https://arxiv.org/abs/2001.06499 for more details.
+    Code is modified from https://github.com/mit-han-lab/temporal-shift-module
+    """
+
+    def forward(self, input, shift):
+        """Perform temporal interlace shift.
+
+        Args:
+            input (Tensor): Feature map with shape [N, num_segments, C, H * W].
+            shift (Tensor): Shift tensor with shape [N, num_segments].
+
+        Returns:
+            Feature map after temporal interlace shift.
+        """
+        return tin_shift(input, shift)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..98c29743dc9069f4b15f8e95e28ada48c3e9e917
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .collate import collate
+from .data_container import DataContainer
+from .data_parallel import MMDataParallel
+from .distributed import MMDistributedDataParallel
+from .registry import MODULE_WRAPPERS
+from .scatter_gather import scatter, scatter_kwargs
+from .utils import is_module_wrapper
+
+__all__ = [
+    'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel',
+    'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/_functions.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..c76f9c28013b18568c1157c03d5af11e5ef416f0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/_functions.py
@@ -0,0 +1,88 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import torch
+import os
+
+
+def scatter(input, devices, streams=None):
+    """Scatters tensor across multiple GPUs."""
+    if devices == [-1]:
+        devices = [0]
+    if streams is None:
+        streams = [None] * len(devices)
+
+    if isinstance(input, list):
+        chunk_size = (len(input) - 1) // len(devices) + 1
+        outputs = [
+            scatter(input[i], [devices[i // chunk_size]],
+                    [streams[i // chunk_size]]) for i in range(len(input))
+        ]
+        return outputs
+    elif isinstance(input, torch.Tensor):
+        output = input.contiguous()
+        # TODO: copy to a pinned buffer first (if copying from CPU)
+        stream = streams[0] if output.numel() > 0 else None
+        if devices != [-1]:
+            with torch.npu.device(devices[0]), torch.npu.stream(stream):
+                output = output.to(f'npu:{str(devices[0])}', non_blocking=True)
+        else:
+            # unsquzee the first dimension thus the tensor's shape is the
+            # same as those scattered with GPU.
+            output = output.unsqueeze(0)
+        return output
+    else:
+        raise Exception(f'Unknown type {type(input)}.')
+
+
+def synchronize_stream(output, devices, streams):
+    if isinstance(output, list):
+        chunk_size = len(output) // len(devices)
+        for i in range(len(devices)):
+            for j in range(chunk_size):
+                synchronize_stream(output[i * chunk_size + j], [devices[i]],
+                                   [streams[i]])
+    elif isinstance(output, torch.Tensor):
+        if output.numel() != 0:
+            with torch.npu.device(devices[0]):
+                main_stream = torch.npu.current_stream()
+                main_stream.wait_stream(streams[0])
+                output.record_stream(main_stream)
+    else:
+        raise Exception(f'Unknown type {type(output)}.')
+
+
+def get_input_device(input):
+    if isinstance(input, list):
+        for item in input:
+            input_device = get_input_device(item)
+            if input_device != -1:
+                return input_device
+        return -1
+    elif isinstance(input, torch.Tensor):
+        return input.get_device() if input.is_cuda else -1
+    else:
+        raise Exception(f'Unknown type {type(input)}.')
+
+
+def _get_stream(device):
+    """Gets a background stream for copying between CPU and GPU"""
+    _streams = [None] * torch.npu.device_count()
+    if _streams[device] is None:
+        _streams[device] = torch.npu.Stream(device)
+    return _streams[device]
+
+class Scatter:
+
+    @staticmethod
+    def forward(target_gpus, input):
+        input_device = get_input_device(input)
+        streams = None
+        if input_device == -1 and target_gpus != [-1]:
+            # Perform CPU to GPU copies in a background stream
+            streams = [_get_stream(device) for device in target_gpus]
+
+        outputs = scatter(input, target_gpus, streams)
+        # Synchronize with the copy stream
+        if streams is not None:
+            synchronize_stream(outputs, target_gpus, streams)
+
+        return tuple(outputs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/collate.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/collate.py
new file mode 100644
index 0000000000000000000000000000000000000000..21155cbb89bbed174834310ba525afb4f0bc4178
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/collate.py
@@ -0,0 +1,84 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from collections.abc import Mapping, Sequence
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data.dataloader import default_collate
+
+from .data_container import DataContainer
+
+
+def collate(batch, samples_per_gpu=1):
+    """Puts each data field into a tensor/DataContainer with outer dimension
+    batch size.
+
+    Extend default_collate to add support for
+    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
+
+    1. cpu_only = True, e.g., meta data
+    2. cpu_only = False, stack = True, e.g., images tensors
+    3. cpu_only = False, stack = False, e.g., gt bboxes
+    """
+
+    if not isinstance(batch, Sequence):
+        raise TypeError(f'{batch.dtype} is not supported.')
+
+    if isinstance(batch[0], DataContainer):
+        stacked = []
+        if batch[0].cpu_only:
+            for i in range(0, len(batch), samples_per_gpu):
+                stacked.append(
+                    [sample.data for sample in batch[i:i + samples_per_gpu]])
+            return DataContainer(
+                stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
+        elif batch[0].stack:
+            for i in range(0, len(batch), samples_per_gpu):
+                assert isinstance(batch[i].data, torch.Tensor)
+
+                if batch[i].pad_dims is not None:
+                    ndim = batch[i].dim()
+                    assert ndim > batch[i].pad_dims
+                    max_shape = [0 for _ in range(batch[i].pad_dims)]
+                    for dim in range(1, batch[i].pad_dims + 1):
+                        max_shape[dim - 1] = batch[i].size(-dim)
+                    for sample in batch[i:i + samples_per_gpu]:
+                        for dim in range(0, ndim - batch[i].pad_dims):
+                            assert batch[i].size(dim) == sample.size(dim)
+                        for dim in range(1, batch[i].pad_dims + 1):
+                            max_shape[dim - 1] = max(max_shape[dim - 1],
+                                                     sample.size(-dim))
+                    padded_samples = []
+                    for sample in batch[i:i + samples_per_gpu]:
+                        pad = [0 for _ in range(batch[i].pad_dims * 2)]
+                        for dim in range(1, batch[i].pad_dims + 1):
+                            pad[2 * dim -
+                                1] = max_shape[dim - 1] - sample.size(-dim)
+                        padded_samples.append(
+                            F.pad(
+                                sample.data, pad, value=sample.padding_value))
+                    stacked.append(default_collate(padded_samples))
+                elif batch[i].pad_dims is None:
+                    stacked.append(
+                        default_collate([
+                            sample.data
+                            for sample in batch[i:i + samples_per_gpu]
+                        ]))
+                else:
+                    raise ValueError(
+                        'pad_dims should be either None or integers (1-3)')
+
+        else:
+            for i in range(0, len(batch), samples_per_gpu):
+                stacked.append(
+                    [sample.data for sample in batch[i:i + samples_per_gpu]])
+        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
+    elif isinstance(batch[0], Sequence):
+        transposed = zip(*batch)
+        return [collate(samples, samples_per_gpu) for samples in transposed]
+    elif isinstance(batch[0], Mapping):
+        return {
+            key: collate([d[key] for d in batch], samples_per_gpu)
+            for key in batch[0]
+        }
+    else:
+        return default_collate(batch)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/data_container.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/data_container.py
new file mode 100644
index 0000000000000000000000000000000000000000..7511c04f98f55d22ef4f1187f0dd5bdc486f4909
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/data_container.py
@@ -0,0 +1,89 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import functools
+
+import torch
+
+
+def assert_tensor_type(func):
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not isinstance(args[0].data, torch.Tensor):
+            raise AttributeError(
+                f'{args[0].__class__.__name__} has no attribute '
+                f'{func.__name__} for type {args[0].datatype}')
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+class DataContainer:
+    """A container for any type of objects.
+
+    Typically tensors will be stacked in the collate function and sliced along
+    some dimension in the scatter function. This behavior has some limitations.
+    1. All tensors have to be the same size.
+    2. Types are limited (numpy array or Tensor).
+
+    We design `DataContainer` and `MMDataParallel` to overcome these
+    limitations. The behavior can be either of the following.
+
+    - copy to GPU, pad all tensors to the same size and stack them
+    - copy to GPU without stacking
+    - leave the objects as is and pass it to the model
+    - pad_dims specifies the number of last few dimensions to do padding
+    """
+
+    def __init__(self,
+                 data,
+                 stack=False,
+                 padding_value=0,
+                 cpu_only=False,
+                 pad_dims=2):
+        self._data = data
+        self._cpu_only = cpu_only
+        self._stack = stack
+        self._padding_value = padding_value
+        assert pad_dims in [None, 1, 2, 3]
+        self._pad_dims = pad_dims
+
+    def __repr__(self):
+        return f'{self.__class__.__name__}({repr(self.data)})'
+
+    def __len__(self):
+        return len(self._data)
+
+    @property
+    def data(self):
+        return self._data
+
+    @property
+    def datatype(self):
+        if isinstance(self.data, torch.Tensor):
+            return self.data.type()
+        else:
+            return type(self.data)
+
+    @property
+    def cpu_only(self):
+        return self._cpu_only
+
+    @property
+    def stack(self):
+        return self._stack
+
+    @property
+    def padding_value(self):
+        return self._padding_value
+
+    @property
+    def pad_dims(self):
+        return self._pad_dims
+
+    @assert_tensor_type
+    def size(self, *args, **kwargs):
+        return self.data.size(*args, **kwargs)
+
+    @assert_tensor_type
+    def dim(self):
+        return self.data.dim()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/data_parallel.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/data_parallel.py
new file mode 100644
index 0000000000000000000000000000000000000000..d727f0f7b7ae6f2cc8130f72489765632fccee1f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/data_parallel.py
@@ -0,0 +1,103 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from itertools import chain
+
+from torch.nn.parallel import DataParallel
+
+from .scatter_gather import scatter_kwargs
+
+
+class MMDataParallel(DataParallel):
+    """The DataParallel module that supports DataContainer.
+
+    MMDataParallel has two main differences with PyTorch DataParallel:
+
+    - It supports a custom type :class:`DataContainer` which allows more
+      flexible control of input data during both GPU and CPU inference.
+    - It implement two more APIs ``train_step()`` and ``val_step()``.
+
+    Args:
+        module (:class:`nn.Module`): Module to be encapsulated.
+        device_ids (list[int]): Device IDS of modules to be scattered to.
+            Defaults to None when GPU is not available.
+        output_device (str | int): Device ID for output. Defaults to None.
+        dim (int): Dimension used to scatter the data. Defaults to 0.
+    """
+
+    def __init__(self, *args, dim=0, **kwargs):
+        super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs)
+        self.dim = dim
+
+    def forward(self, *inputs, **kwargs):
+        """Override the original forward function.
+
+        The main difference lies in the CPU inference where the datas in
+        :class:`DataContainers` will still be gathered.
+        """
+        if not self.device_ids:
+            # We add the following line thus the module could gather and
+            # convert data containers as those in GPU inference
+            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
+            return self.module(*inputs[0], **kwargs[0])
+        else:
+            return super().forward(*inputs, **kwargs)
+
+    def forward_val(self, *inputs, **kwargs):
+        """Override the original forward function.
+
+        The main difference lies in the CPU inference where the datas in
+        :class:`DataContainers` will still be gathered.
+        """
+        if not self.device_ids:
+            # We add the following line thus the module could gather and
+            # convert data containers as those in GPU inference
+            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
+            return self.module(*inputs[0], **kwargs[0])
+        else:
+            return super().forward(*inputs, **kwargs)
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+    def train_step(self, *inputs, **kwargs):
+        if not self.device_ids:
+            # We add the following line thus the module could gather and
+            # convert data containers as those in GPU inference
+            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
+            return self.module.train_step(*inputs[0], **kwargs[0])
+
+        assert len(self.device_ids) == 1, \
+            ('MMDataParallel only supports single GPU training, if you need to'
+             ' train with multiple GPUs, please use MMDistributedDataParallel'
+             'instead.')
+
+        for t in chain(self.module.parameters(), self.module.buffers()):
+            if t.device != self.src_device_obj:
+                raise RuntimeError(
+                    'module must have its parameters and buffers '
+                    f'on device {self.src_device_obj} (device_ids[0]) but '
+                    f'found one of them on device: {t.device}')
+
+        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+        return self.module.train_step(*inputs[0], **kwargs[0])
+
+    def val_step(self, *inputs, **kwargs):
+        if not self.device_ids:
+            # We add the following line thus the module could gather and
+            # convert data containers as those in GPU inference
+            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
+            return self.module.val_step(*inputs[0], **kwargs[0])
+
+        assert len(self.device_ids) == 1, \
+            ('MMDataParallel only supports single GPU training, if you need to'
+             ' train with multiple GPUs, please use MMDistributedDataParallel'
+             ' instead.')
+
+        for t in chain(self.module.parameters(), self.module.buffers()):
+            if t.device != self.src_device_obj:
+                raise RuntimeError(
+                    'module must have its parameters and buffers '
+                    f'on device {self.src_device_obj} (device_ids[0]) but '
+                    f'found one of them on device: {t.device}')
+
+        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+        return self.module.val_step(*inputs, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/distributed.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/distributed.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c8b84f1e6c23b3474499ddece36d62cedc2145d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/distributed.py
@@ -0,0 +1,114 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import torch
+from torch.nn.parallel.distributed import (DistributedDataParallel,
+                                           _find_tensors)
+
+from mmcv import print_log
+from mmcv.utils import TORCH_VERSION
+from .scatter_gather import scatter_kwargs
+
+
+class MMDistributedDataParallel(DistributedDataParallel):
+    """The DDP module that supports DataContainer.
+
+    MMDDP has two main differences with PyTorch DDP:
+
+    - It supports a custom type :class:`DataContainer` which allows more
+      flexible control of input data.
+    - It implement two APIs ``train_step()`` and ``val_step()``.
+    """
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+    
+    def forward_val(self, *inputs, **kwargs):
+        """Override the original forward function.
+
+        The main difference lies in the CPU inference where the datas in
+        :class:`DataContainers` will still be gathered.
+        """
+        # We add the following line thus the module could gather and
+        # convert data containers as those in GPU inference
+        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+        return self.module(*inputs[0], **kwargs[0])
+
+    def train_step(self, *inputs, **kwargs):
+        """train_step() API for module wrapped by DistributedDataParallel.
+
+        This method is basically the same as
+        ``DistributedDataParallel.forward()``, while replacing
+        ``self.module.forward()`` with ``self.module.train_step()``.
+        It is compatible with PyTorch 1.1 - 1.5.
+        """
+
+        # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
+        # end of backward to the beginning of forward.
+        if (TORCH_VERSION >= '1.7' and 'parrots'
+                not in TORCH_VERSION) and self.reducer._rebuild_buckets():
+            print_log(
+                'Reducer buckets have been rebuilt in this iteration.',
+                logger='mmcv')
+
+        if getattr(self, 'require_forward_param_sync', True):
+            self._sync_params()
+        if self.device_ids:
+            inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+            if len(self.device_ids) == 1:
+                output = self.module.train_step(*inputs[0], **kwargs[0])
+            else:
+                outputs = self.parallel_apply(
+                    self._module_copies[:len(inputs)], inputs, kwargs)
+                output = self.gather(outputs, self.output_device)
+        else:
+            output = self.module.train_step(*inputs, **kwargs)
+
+        if torch.is_grad_enabled() and getattr(
+                self, 'require_backward_grad_sync', True):
+            if self.find_unused_parameters:
+                self.reducer.prepare_for_backward(list(_find_tensors(output)))
+            else:
+                self.reducer.prepare_for_backward([])
+        else:
+            if TORCH_VERSION > '1.2':
+                self.require_forward_param_sync = False
+        return output
+
+    def val_step(self, *inputs, **kwargs):
+        """val_step() API for module wrapped by DistributedDataParallel.
+
+        This method is basically the same as
+        ``DistributedDataParallel.forward()``, while replacing
+        ``self.module.forward()`` with ``self.module.val_step()``.
+        It is compatible with PyTorch 1.1 - 1.5.
+        """
+        # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
+        # end of backward to the beginning of forward.
+        if (TORCH_VERSION >= '1.7' and 'parrots'
+                not in TORCH_VERSION) and self.reducer._rebuild_buckets():
+            print_log(
+                'Reducer buckets have been rebuilt in this iteration.',
+                logger='mmcv')
+
+        if getattr(self, 'require_forward_param_sync', True):
+            self._sync_params()
+        if self.device_ids:
+            inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
+            if len(self.device_ids) == 1:
+                output = self.module.val_step(*inputs[0], **kwargs[0])
+            else:
+                outputs = self.parallel_apply(
+                    self._module_copies[:len(inputs)], inputs, kwargs)
+                output = self.gather(outputs, self.output_device)
+        else:
+            output = self.module.val_step(*inputs, **kwargs)
+
+        if torch.is_grad_enabled() and getattr(
+                self, 'require_backward_grad_sync', True):
+            if self.find_unused_parameters:
+                self.reducer.prepare_for_backward(list(_find_tensors(output)))
+            else:
+                self.reducer.prepare_for_backward([])
+        else:
+            if TORCH_VERSION > '1.2':
+                self.require_forward_param_sync = False
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/distributed_deprecated.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/distributed_deprecated.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a49fa9e3f496097f76903d48ec6a2c7026b0974
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/distributed_deprecated.py
@@ -0,0 +1,69 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from torch._utils import (_flatten_dense_tensors, _take_tensors,
+                          _unflatten_dense_tensors)
+
+from mmcv.utils import TORCH_VERSION
+from .registry import MODULE_WRAPPERS
+from .scatter_gather import scatter_kwargs
+
+
+@MODULE_WRAPPERS.register_module()
+class MMDistributedDataParallel(nn.Module):
+
+    def __init__(self,
+                 module,
+                 dim=0,
+                 broadcast_buffers=True,
+                 bucket_cap_mb=25):
+        super(MMDistributedDataParallel, self).__init__()
+        self.module = module
+        self.dim = dim
+        self.broadcast_buffers = broadcast_buffers
+
+        self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024
+        self._sync_params()
+
+    def _dist_broadcast_coalesced(self, tensors, buffer_size):
+        for tensors in _take_tensors(tensors, buffer_size):
+            flat_tensors = _flatten_dense_tensors(tensors)
+            dist.broadcast(flat_tensors, 0)
+            for tensor, synced in zip(
+                    tensors, _unflatten_dense_tensors(flat_tensors, tensors)):
+                tensor.copy_(synced)
+
+    def _sync_params(self):
+        module_states = list(self.module.state_dict().values())
+        if len(module_states) > 0:
+            self._dist_broadcast_coalesced(module_states,
+                                           self.broadcast_bucket_size)
+        if self.broadcast_buffers:
+            if TORCH_VERSION < '1.0':
+                buffers = [b.data for b in self.module._all_buffers()]
+            else:
+                buffers = [b.data for b in self.module.buffers()]
+            if len(buffers) > 0:
+                self._dist_broadcast_coalesced(buffers,
+                                               self.broadcast_bucket_size)
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+    def forward(self, *inputs, **kwargs):
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        return self.module(*inputs[0], **kwargs[0])
+
+    def train_step(self, *inputs, **kwargs):
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        output = self.module.train_step(*inputs[0], **kwargs[0])
+        return output
+
+    def val_step(self, *inputs, **kwargs):
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        output = self.module.val_step(*inputs[0], **kwargs[0])
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/registry.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e592c75c1dcfdf2086c42ef20ea33e21f952cdd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/registry.py
@@ -0,0 +1,7 @@
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+
+from mmcv.utils import Registry
+
+MODULE_WRAPPERS = Registry('module wrapper')
+MODULE_WRAPPERS.register_module(module=DataParallel)
+MODULE_WRAPPERS.register_module(module=DistributedDataParallel)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/scatter_gather.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/scatter_gather.py
new file mode 100644
index 0000000000000000000000000000000000000000..eda0115da8b29704904b7d491c12e094612e3378
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/scatter_gather.py
@@ -0,0 +1,59 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import torch
+from torch.nn.parallel._functions import Scatter as OrigScatter
+
+from ._functions import Scatter
+from .data_container import DataContainer
+
+
+def scatter(inputs, target_gpus, dim=0):
+    """Scatter inputs to target gpus.
+
+    The only difference from original :func:`scatter` is to add support for
+    :type:`~mmcv.parallel.DataContainer`.
+    """
+
+    def scatter_map(obj):
+        if isinstance(obj, torch.Tensor):
+            if target_gpus != [-1]:
+                return Scatter.forward(target_gpus, obj) #OrigScatter.apply(target_gpus, None, dim, obj)
+            else:
+                # for CPU inference we use self-implemented scatter
+                return Scatter.forward(target_gpus, obj)
+        if isinstance(obj, DataContainer):
+            if obj.cpu_only:
+                return obj.data
+            else:
+                return Scatter.forward(target_gpus, obj.data)
+        if isinstance(obj, tuple) and len(obj) > 0:
+            return list(zip(*map(scatter_map, obj)))
+        if isinstance(obj, list) and len(obj) > 0:
+            out = list(map(list, zip(*map(scatter_map, obj))))
+            return out
+        if isinstance(obj, dict) and len(obj) > 0:
+            out = list(map(type(obj), zip(*map(scatter_map, obj.items()))))
+            return out
+        return [obj for targets in target_gpus]
+
+    # After scatter_map is called, a scatter_map cell will exist. This cell
+    # has a reference to the actual function scatter_map, which has references
+    # to a closure that has a reference to the scatter_map cell (because the
+    # fn is recursive). To avoid this reference cycle, we set the function to
+    # None, clearing the cell
+    try:
+        return scatter_map(inputs)
+    finally:
+        scatter_map = None
+
+
+def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
+    """Scatter with support for kwargs dictionary."""
+    inputs = scatter(inputs, target_gpus, dim) if inputs else []
+    kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
+    if len(inputs) < len(kwargs):
+        inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
+    elif len(kwargs) < len(inputs):
+        kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
+    inputs = tuple(inputs)
+    kwargs = tuple(kwargs)
+    return inputs, kwargs
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac7a0e4b5a6de85301132fe5b59925d035b9a2e3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/parallel/utils.py
@@ -0,0 +1,20 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .registry import MODULE_WRAPPERS
+
+
+def is_module_wrapper(module):
+    """Check if a module is a module wrapper.
+
+    The following 3 modules in MMCV (and their subclasses) are regarded as
+    module wrappers: DataParallel, DistributedDataParallel,
+    MMDistributedDataParallel (the deprecated version). You may add you own
+    module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS.
+
+    Args:
+        module (nn.Module): The module to be checked.
+
+    Returns:
+        bool: True if the input module is a module wrapper.
+    """
+    module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values())
+    return isinstance(module, module_wrappers)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..df5680ff0be6c30e7431501e8d2eff4bee911720
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/__init__.py
@@ -0,0 +1,40 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .base_module import BaseModule
+from .base_runner import BaseRunner
+from .builder import RUNNERS, build_runner
+from .checkpoint import (CheckpointLoader, _load_checkpoint,
+                         _load_checkpoint_with_prefix, load_checkpoint,
+                         load_state_dict, save_checkpoint, weights_to_cpu)
+from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info,
+                         init_dist, master_only)
+from .epoch_based_runner import EpochBasedRunner, Runner
+from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model
+from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistSamplerSeedHook,
+                    EMAHook, Fp16OptimizerHook, Hook, IterTimerHook,
+                    LoggerHook, LrUpdaterHook, MlflowLoggerHook, OptimizerHook,
+                    PaviLoggerHook, SyncBuffersHook, TensorboardLoggerHook,
+                    TextLoggerHook, WandbLoggerHook)
+from .iter_based_runner import IterBasedRunner, IterLoader
+from .log_buffer import LogBuffer
+from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS,
+                        DefaultOptimizerConstructor, build_optimizer,
+                        build_optimizer_constructor)
+from .priority import Priority, get_priority
+from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed
+
+__all__ = [
+    'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer',
+    'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook',
+    'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook',
+    'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook',
+    'WandbLoggerHook', 'MlflowLoggerHook', '_load_checkpoint',
+    'load_state_dict', 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint',
+    'Priority', 'get_priority', 'get_host_info', 'get_time_str',
+    'obj_from_dict', 'init_dist', 'get_dist_info', 'master_only',
+    'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor',
+    'build_optimizer', 'build_optimizer_constructor', 'IterLoader',
+    'set_random_seed', 'auto_fp16', 'force_fp32', 'wrap_fp16_model',
+    'Fp16OptimizerHook', 'SyncBuffersHook', 'EMAHook', 'build_runner',
+    'RUNNERS', 'allreduce_grads', 'allreduce_params', 'LossScaler',
+    'CheckpointLoader', 'BaseModule', '_load_checkpoint_with_prefix'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/base_module.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/base_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..f958a6658719f3e710e24a40b15382866fd9bbcc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/base_module.py
@@ -0,0 +1,53 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import warnings
+from abc import ABCMeta
+
+import torch.nn as nn
+
+
+class BaseModule(nn.Module, metaclass=ABCMeta):
+    """Base module for all modules in openmmlab."""
+
+    def __init__(self, init_cfg=None):
+        """Initialize BaseModule, inherited from `torch.nn.Module`
+
+        Args:
+            init_cfg (dict, optional): Initialization config dict.
+        """
+
+        # NOTE init_cfg can be defined in different levels, but init_cfg
+        # in low levels has a higher priority.
+
+        super(BaseModule, self).__init__()
+        # define default value of init_cfg instead of hard code
+        # in init_weigt() function
+        self._is_init = False
+        if init_cfg is not None:
+            self.init_cfg = init_cfg
+
+        # Backward compatibility in derived classes
+        # if pretrained is not None:
+        #     warnings.warn('DeprecationWarning: pretrained is a deprecated \
+        #         key, please consider using init_cfg')
+        #     self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
+
+    @property
+    def is_init(self):
+        return self._is_init
+
+    def init_weight(self):
+        """Initialize the weights."""
+        from ..cnn import initialize
+
+        if not self._is_init:
+
+            if hasattr(self, 'init_cfg'):
+                initialize(self, self.init_cfg)
+                self._is_init = True
+            for module in self.children():
+                if 'init_weight' in dir(module):
+                    module.init_weight()
+
+        else:
+            warnings.warn('This module has bee initialized, \
+                please call initialize(module, init_cfg) to reinitialize it')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/base_runner.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/base_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..f78f1582f33079aae46f92fd28a98cf1d8e92580
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/base_runner.py
@@ -0,0 +1,451 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import copy
+import logging
+import os.path as osp
+import warnings
+from abc import ABCMeta, abstractmethod
+
+import torch
+from torch.optim import Optimizer
+
+import mmcv
+from ..parallel import is_module_wrapper
+from .checkpoint import load_checkpoint
+from .dist_utils import get_dist_info
+from .hooks import HOOKS, Hook
+from .log_buffer import LogBuffer
+from .priority import get_priority
+from .utils import get_time_str
+
+
+class BaseRunner(metaclass=ABCMeta):
+    """The base class of Runner, a training helper for PyTorch.
+
+    All subclasses should implement the following APIs:
+
+    - ``run()``
+    - ``train()``
+    - ``val()``
+    - ``save_checkpoint()``
+
+    Args:
+        model (:obj:`torch.nn.Module`): The model to be run.
+        batch_processor (callable): A callable method that process a data
+            batch. The interface of this method should be
+            `batch_processor(model, data, train_mode) -> dict`
+        optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an
+            optimizer (in most cases) or a dict of optimizers (in models that
+            requires more than one optimizer, e.g., GAN).
+        work_dir (str, optional): The working directory to save checkpoints
+            and logs. Defaults to None.
+        logger (:obj:`logging.Logger`): Logger used during training.
+             Defaults to None. (The default value is just for backward
+             compatibility)
+        meta (dict | None): A dict records some import information such as
+            environment info and seed, which will be logged in logger hook.
+            Defaults to None.
+        max_epochs (int, optional): Total training epochs.
+        max_iters (int, optional): Total training iterations.
+    """
+
+    def __init__(self,
+                 model,
+                 batch_processor=None,
+                 optimizer=None,
+                 work_dir=None,
+                 logger=None,
+                 meta=None,
+                 max_iters=None,
+                 max_epochs=None):
+        if batch_processor is not None:
+            if not callable(batch_processor):
+                raise TypeError('batch_processor must be callable, '
+                                f'but got {type(batch_processor)}')
+            warnings.warn('batch_processor is deprecated, please implement '
+                          'train_step() and val_step() in the model instead.')
+            # raise an error is `batch_processor` is not None and
+            # `model.train_step()` exists.
+            if is_module_wrapper(model):
+                _model = model.module
+            else:
+                _model = model
+            if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'):
+                raise RuntimeError(
+                    'batch_processor and model.train_step()/model.val_step() '
+                    'cannot be both available.')
+        else:
+            assert hasattr(model, 'train_step')
+
+        # check the type of `optimizer`
+        if isinstance(optimizer, dict):
+            for name, optim in optimizer.items():
+                if not isinstance(optim, Optimizer):
+                    raise TypeError(
+                        f'optimizer must be a dict of torch.optim.Optimizers, '
+                        f'but optimizer["{name}"] is a {type(optim)}')
+        elif not isinstance(optimizer, Optimizer) and optimizer is not None:
+            raise TypeError(
+                f'optimizer must be a torch.optim.Optimizer object '
+                f'or dict or None, but got {type(optimizer)}')
+
+        # check the type of `logger`
+        if not isinstance(logger, logging.Logger):
+            raise TypeError(f'logger must be a logging.Logger object, '
+                            f'but got {type(logger)}')
+
+        # check the type of `meta`
+        if meta is not None and not isinstance(meta, dict):
+            raise TypeError(
+                f'meta must be a dict or None, but got {type(meta)}')
+
+        self.model = model
+        self.batch_processor = batch_processor
+        self.optimizer = optimizer
+        self.logger = logger
+        self.meta = meta
+
+        # create work_dir
+        if mmcv.is_str(work_dir):
+            self.work_dir = osp.abspath(work_dir)
+            mmcv.mkdir_or_exist(self.work_dir)
+        elif work_dir is None:
+            self.work_dir = None
+        else:
+            raise TypeError('"work_dir" must be a str or None')
+
+        # get model name from the model class
+        if hasattr(self.model, 'module'):
+            self._model_name = self.model.module.__class__.__name__
+        else:
+            self._model_name = self.model.__class__.__name__
+
+        self._rank, self._world_size = get_dist_info()
+        self.timestamp = get_time_str()
+        self.mode = None
+        self._hooks = []
+        self._epoch = 0
+        self._iter = 0
+        self._inner_iter = 0
+
+        if max_epochs is not None and max_iters is not None:
+            raise ValueError(
+                'Only one of `max_epochs` or `max_iters` can be set.')
+
+        self._max_epochs = max_epochs
+        self._max_iters = max_iters
+        # TODO: Redesign LogBuffer, it is not flexible and elegant enough
+        self.log_buffer = LogBuffer()
+
+    @property
+    def model_name(self):
+        """str: Name of the model, usually the module class name."""
+        return self._model_name
+
+    @property
+    def rank(self):
+        """int: Rank of current process. (distributed training)"""
+        return self._rank
+
+    @property
+    def world_size(self):
+        """int: Number of processes participating in the job.
+        (distributed training)"""
+        return self._world_size
+
+    @property
+    def hooks(self):
+        """list[:obj:`Hook`]: A list of registered hooks."""
+        return self._hooks
+
+    @property
+    def epoch(self):
+        """int: Current epoch."""
+        return self._epoch
+
+    @property
+    def iter(self):
+        """int: Current iteration."""
+        return self._iter
+
+    @property
+    def inner_iter(self):
+        """int: Iteration in an epoch."""
+        return self._inner_iter
+
+    @property
+    def max_epochs(self):
+        """int: Maximum training epochs."""
+        return self._max_epochs
+
+    @property
+    def max_iters(self):
+        """int: Maximum training iterations."""
+        return self._max_iters
+
+    @abstractmethod
+    def train(self):
+        pass
+
+    @abstractmethod
+    def val(self):
+        pass
+
+    @abstractmethod
+    def run(self, data_loaders, workflow, **kwargs):
+        pass
+
+    @abstractmethod
+    def save_checkpoint(self,
+                        out_dir,
+                        filename_tmpl,
+                        save_optimizer=True,
+                        meta=None,
+                        create_symlink=True):
+        pass
+
+    def current_lr(self):
+        """Get current learning rates.
+
+        Returns:
+            list[float] | dict[str, list[float]]: Current learning rates of all
+                param groups. If the runner has a dict of optimizers, this
+                method will return a dict.
+        """
+        if isinstance(self.optimizer, torch.optim.Optimizer):
+            lr = [group['lr'] for group in self.optimizer.param_groups]
+        elif isinstance(self.optimizer, dict):
+            lr = dict()
+            for name, optim in self.optimizer.items():
+                lr[name] = [group['lr'] for group in optim.param_groups]
+        else:
+            raise RuntimeError(
+                'lr is not applicable because optimizer does not exist.')
+        return lr
+
+    def current_momentum(self):
+        """Get current momentums.
+
+        Returns:
+            list[float] | dict[str, list[float]]: Current momentums of all
+                param groups. If the runner has a dict of optimizers, this
+                method will return a dict.
+        """
+
+        def _get_momentum(optimizer):
+            momentums = []
+            for group in optimizer.param_groups:
+                if 'momentum' in group.keys():
+                    momentums.append(group['momentum'])
+                elif 'betas' in group.keys():
+                    momentums.append(group['betas'][0])
+                else:
+                    momentums.append(0)
+            return momentums
+
+        if self.optimizer is None:
+            raise RuntimeError(
+                'momentum is not applicable because optimizer does not exist.')
+        elif isinstance(self.optimizer, torch.optim.Optimizer):
+            momentums = _get_momentum(self.optimizer)
+        elif isinstance(self.optimizer, dict):
+            momentums = dict()
+            for name, optim in self.optimizer.items():
+                momentums[name] = _get_momentum(optim)
+        return momentums
+
+    def register_hook(self, hook, priority='NORMAL'):
+        """Register a hook into the hook list.
+
+        The hook will be inserted into a priority queue, with the specified
+        priority (See :class:`Priority` for details of priorities).
+        For hooks with the same priority, they will be triggered in the same
+        order as they are registered.
+
+        Args:
+            hook (:obj:`Hook`): The hook to be registered.
+            priority (int or str or :obj:`Priority`): Hook priority.
+                Lower value means higher priority.
+        """
+        assert isinstance(hook, Hook)
+        if hasattr(hook, 'priority'):
+            raise ValueError('"priority" is a reserved attribute for hooks')
+        priority = get_priority(priority)
+        hook.priority = priority
+        # insert the hook to a sorted list
+        inserted = False
+        for i in range(len(self._hooks) - 1, -1, -1):
+            if priority >= self._hooks[i].priority:
+                self._hooks.insert(i + 1, hook)
+                inserted = True
+                break
+        if not inserted:
+            self._hooks.insert(0, hook)
+
+    def register_hook_from_cfg(self, hook_cfg):
+        """Register a hook from its cfg.
+
+        Args:
+            hook_cfg (dict): Hook config. It should have at least keys 'type'
+              and 'priority' indicating its type and priority.
+
+        Notes:
+            The specific hook class to register should not use 'type' and
+            'priority' arguments during initialization.
+        """
+        hook_cfg = hook_cfg.copy()
+        priority = hook_cfg.pop('priority', 'NORMAL')
+        hook = mmcv.build_from_cfg(hook_cfg, HOOKS)
+        self.register_hook(hook, priority=priority)
+
+    def call_hook(self, fn_name):
+        """Call all hooks.
+
+        Args:
+            fn_name (str): The function name in each hook to be called, such as
+                "before_train_epoch".
+        """
+        for hook in self._hooks:
+            getattr(hook, fn_name)(self)
+
+    def load_checkpoint(self, filename, map_location='cpu', strict=False):
+        self.logger.info('load checkpoint from %s', filename)
+        return load_checkpoint(self.model, filename, map_location, strict,
+                               self.logger)
+
+    def resume(self,
+               checkpoint,
+               resume_optimizer=True,
+               map_location='default'):
+        if map_location == 'default':
+            if torch.cuda.is_available():
+                device_id = torch.cuda.current_device()
+                checkpoint = self.load_checkpoint(
+                    checkpoint,
+                    map_location=lambda storage, loc: storage.cuda(device_id))
+            else:
+                checkpoint = self.load_checkpoint(checkpoint)
+        else:
+            checkpoint = self.load_checkpoint(
+                checkpoint, map_location=map_location)
+
+        self._epoch = checkpoint['meta']['epoch']
+        self._iter = checkpoint['meta']['iter']
+        if 'optimizer' in checkpoint and resume_optimizer:
+            if isinstance(self.optimizer, Optimizer):
+                self.optimizer.load_state_dict(checkpoint['optimizer'])
+            elif isinstance(self.optimizer, dict):
+                for k in self.optimizer.keys():
+                    self.optimizer[k].load_state_dict(
+                        checkpoint['optimizer'][k])
+            else:
+                raise TypeError(
+                    'Optimizer should be dict or torch.optim.Optimizer '
+                    f'but got {type(self.optimizer)}')
+
+        self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)
+
+    def register_lr_hook(self, lr_config):
+        if lr_config is None:
+            return
+        elif isinstance(lr_config, dict):
+            assert 'policy' in lr_config
+            policy_type = lr_config.pop('policy')
+            # If the type of policy is all in lower case, e.g., 'cyclic',
+            # then its first letter will be capitalized, e.g., to be 'Cyclic'.
+            # This is for the convenient usage of Lr updater.
+            # Since this is not applicable for `
+            # CosineAnnealingLrUpdater`,
+            # the string will not be changed if it contains capital letters.
+            if policy_type == policy_type.lower():
+                policy_type = policy_type.title()
+            hook_type = policy_type + 'LrUpdaterHook'
+            lr_config['type'] = hook_type
+            hook = mmcv.build_from_cfg(lr_config, HOOKS)
+        else:
+            hook = lr_config
+        self.register_hook(hook)
+
+    def register_momentum_hook(self, momentum_config):
+        if momentum_config is None:
+            return
+        if isinstance(momentum_config, dict):
+            assert 'policy' in momentum_config
+            policy_type = momentum_config.pop('policy')
+            # If the type of policy is all in lower case, e.g., 'cyclic',
+            # then its first letter will be capitalized, e.g., to be 'Cyclic'.
+            # This is for the convenient usage of momentum updater.
+            # Since this is not applicable for
+            # `CosineAnnealingMomentumUpdater`,
+            # the string will not be changed if it contains capital letters.
+            if policy_type == policy_type.lower():
+                policy_type = policy_type.title()
+            hook_type = policy_type + 'MomentumUpdaterHook'
+            momentum_config['type'] = hook_type
+            hook = mmcv.build_from_cfg(momentum_config, HOOKS)
+        else:
+            hook = momentum_config
+        self.register_hook(hook)
+
+    def register_optimizer_hook(self, optimizer_config):
+        if optimizer_config is None:
+            return
+        if isinstance(optimizer_config, dict):
+            optimizer_config.setdefault('type', 'OptimizerHook')
+            hook = mmcv.build_from_cfg(optimizer_config, HOOKS)
+        else:
+            hook = optimizer_config
+        self.register_hook(hook)
+
+    def register_checkpoint_hook(self, checkpoint_config):
+        if checkpoint_config is None:
+            return
+        if isinstance(checkpoint_config, dict):
+            checkpoint_config.setdefault('type', 'CheckpointHook')
+            hook = mmcv.build_from_cfg(checkpoint_config, HOOKS)
+        else:
+            hook = checkpoint_config
+        self.register_hook(hook)
+
+    def register_logger_hooks(self, log_config):
+        if log_config is None:
+            return
+        log_interval = log_config['interval']
+        for info in log_config['hooks']:
+            logger_hook = mmcv.build_from_cfg(
+                info, HOOKS, default_args=dict(interval=log_interval))
+            self.register_hook(logger_hook, priority='VERY_LOW')
+
+    def register_timer_hook(self, timer_config):
+        if timer_config is None:
+            return
+        if isinstance(timer_config, dict):
+            timer_config_ = copy.deepcopy(timer_config)
+            hook = mmcv.build_from_cfg(timer_config_, HOOKS)
+        else:
+            hook = timer_config
+        self.register_hook(hook)
+
+    def register_training_hooks(self,
+                                lr_config,
+                                optimizer_config=None,
+                                checkpoint_config=None,
+                                log_config=None,
+                                momentum_config=None,
+                                timer_config=dict(type='IterTimerHook')):
+        """Register default hooks for training.
+
+        Default hooks include:
+
+        - LrUpdaterHook
+        - MomentumUpdaterHook
+        - OptimizerStepperHook
+        - CheckpointSaverHook
+        - IterTimerHook
+        - LoggerHook(s)
+        """
+        self.register_lr_hook(lr_config)
+        self.register_momentum_hook(momentum_config)
+        self.register_optimizer_hook(optimizer_config)
+        self.register_checkpoint_hook(checkpoint_config)
+        self.register_timer_hook(timer_config)
+        self.register_logger_hooks(log_config)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/builder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9989b0248f43766b4847be1237bed5213631dcf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/builder.py
@@ -0,0 +1,7 @@
+from ..utils import Registry, build_from_cfg
+
+RUNNERS = Registry('runner')
+
+
+def build_runner(cfg, default_args=None):
+    return build_from_cfg(cfg, RUNNERS, default_args=default_args)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/checkpoint.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..b949c8e3f114f6e9f3e41737c786e29b5a245316
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/checkpoint.py
@@ -0,0 +1,681 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import io
+import os
+import os.path as osp
+import pkgutil
+import time
+import warnings
+from collections import OrderedDict
+from importlib import import_module
+from tempfile import TemporaryDirectory
+
+import torch
+import torchvision
+from torch.optim import Optimizer
+from torch.utils import model_zoo
+
+import mmcv
+from ..fileio import FileClient
+from ..fileio import load as load_file
+from ..parallel import is_module_wrapper
+from ..utils import mkdir_or_exist
+from .dist_utils import get_dist_info
+
+ENV_MMCV_HOME = 'MMCV_HOME'
+ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+DEFAULT_CACHE_DIR = '~/.cache'
+
+
+def _get_mmcv_home():
+    mmcv_home = os.path.expanduser(
+        os.getenv(
+            ENV_MMCV_HOME,
+            os.path.join(
+                os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')))
+
+    mkdir_or_exist(mmcv_home)
+    return mmcv_home
+
+
+def load_state_dict(module, state_dict, strict=False, logger=None):
+    """Load state_dict to a module.
+
+    This method is modified from :meth:`torch.nn.Module.load_state_dict`.
+    Default value for ``strict`` is set to ``False`` and the message for
+    param mismatch will be shown even if strict is False.
+
+    Args:
+        module (Module): Module that receives the state_dict.
+        state_dict (OrderedDict): Weights.
+        strict (bool): whether to strictly enforce that the keys
+            in :attr:`state_dict` match the keys returned by this module's
+            :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
+        logger (:obj:`logging.Logger`, optional): Logger to log the error
+            message. If not specified, print function will be used.
+    """
+    unexpected_keys = []
+    all_missing_keys = []
+    err_msg = []
+
+    metadata = getattr(state_dict, '_metadata', None)
+    state_dict = state_dict.copy()
+    if metadata is not None:
+        state_dict._metadata = metadata
+
+    # use _load_from_state_dict to enable checkpoint version control
+    def load(module, prefix=''):
+        # recursively check parallel module in case that the model has a
+        # complicated structure, e.g., nn.Module(nn.Module(DDP))
+        if is_module_wrapper(module):
+            module = module.module
+        local_metadata = {} if metadata is None else metadata.get(
+            prefix[:-1], {})
+        module._load_from_state_dict(state_dict, prefix, local_metadata, True,
+                                     all_missing_keys, unexpected_keys,
+                                     err_msg)
+        for name, child in module._modules.items():
+            if child is not None:
+                load(child, prefix + name + '.')
+
+    load(module)
+    load = None  # break load->load reference cycle
+
+    # ignore "num_batches_tracked" of BN layers
+    missing_keys = [
+        key for key in all_missing_keys if 'num_batches_tracked' not in key
+    ]
+
+    if unexpected_keys:
+        err_msg.append('unexpected key in source '
+                       f'state_dict: {", ".join(unexpected_keys)}\n')
+    if missing_keys:
+        err_msg.append(
+            f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
+
+    rank, _ = get_dist_info()
+    if len(err_msg) > 0 and rank == 0:
+        err_msg.insert(
+            0, 'The model and loaded state dict do not match exactly\n')
+        err_msg = '\n'.join(err_msg)
+        if strict:
+            raise RuntimeError(err_msg)
+        elif logger is not None:
+            logger.warning(err_msg)
+        else:
+            print(err_msg)
+
+
+def get_torchvision_models():
+    model_urls = dict()
+    for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__):
+        if ispkg:
+            continue
+        _zoo = import_module(f'torchvision.models.{name}')
+        if hasattr(_zoo, 'model_urls'):
+            _urls = getattr(_zoo, 'model_urls')
+            model_urls.update(_urls)
+    return model_urls
+
+
+def get_external_models():
+    mmcv_home = _get_mmcv_home()
+    default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')
+    default_urls = load_file(default_json_path)
+    assert isinstance(default_urls, dict)
+    external_json_path = osp.join(mmcv_home, 'open_mmlab.json')
+    if osp.exists(external_json_path):
+        external_urls = load_file(external_json_path)
+        assert isinstance(external_urls, dict)
+        default_urls.update(external_urls)
+
+    return default_urls
+
+
+def get_mmcls_models():
+    mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json')
+    mmcls_urls = load_file(mmcls_json_path)
+
+    return mmcls_urls
+
+
+def get_deprecated_model_names():
+    deprecate_json_path = osp.join(mmcv.__path__[0],
+                                   'model_zoo/deprecated.json')
+    deprecate_urls = load_file(deprecate_json_path)
+    assert isinstance(deprecate_urls, dict)
+
+    return deprecate_urls
+
+
+def _process_mmcls_checkpoint(checkpoint):
+    state_dict = checkpoint['state_dict']
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        if k.startswith('backbone.'):
+            new_state_dict[k[9:]] = v
+    new_checkpoint = dict(state_dict=new_state_dict)
+
+    return new_checkpoint
+
+
+class CheckpointLoader:
+    """A general checkpoint loader to manage all schemes."""
+
+    _schemes = {}
+
+    @classmethod
+    def _register_scheme(cls, prefixes, loader, force=False):
+        if isinstance(prefixes, str):
+            prefixes = [prefixes]
+        else:
+            assert isinstance(prefixes, (list, tuple))
+        for prefix in prefixes:
+            if (prefix not in cls._schemes) or force:
+                cls._schemes[prefix] = loader
+            else:
+                raise KeyError(
+                    f'{prefix} is already registered as a loader backend, '
+                    'add "force=True" if you want to override it')
+        # sort, longer prefixes take priority
+        cls._schemes = OrderedDict(
+            sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True))
+
+    @classmethod
+    def register_scheme(cls, prefixes, loader=None, force=False):
+        """Register a loader to CheckpointLoader.
+
+        This method can be used as a normal class method or a decorator.
+
+        Args:
+            prefixes (str or list[str] or tuple[str]):
+            The prefix of the registered loader.
+            loader (function, optional): The loader function to be registered.
+                When this method is used as a decorator, loader is None.
+                Defaults to None.
+            force (bool, optional): Whether to override the loader
+                if the prefix has already been registered. Defaults to False.
+        """
+
+        if loader is not None:
+            cls._register_scheme(prefixes, loader, force=force)
+            return
+
+        def _register(loader_cls):
+            cls._register_scheme(prefixes, loader_cls, force=force)
+            return loader_cls
+
+        return _register
+
+    @classmethod
+    def _get_checkpoint_loader(cls, path):
+        """Finds a loader that supports the given path. Falls back to the local
+        loader if no other loader is found.
+
+        Args:
+            path (str): checkpoint path
+
+        Returns:
+            loader (function): checkpoint loader
+        """
+
+        for p in cls._schemes:
+            if path.startswith(p):
+                return cls._schemes[p]
+
+    @classmethod
+    def load_checkpoint(cls, filename, map_location=None, logger=None):
+        """load checkpoint through URL scheme path.
+
+        Args:
+            filename (str): checkpoint file name with given prefix
+            map_location (str, optional): Same as :func:`torch.load`.
+                Default: None
+            logger (:mod:`logging.Logger`, optional): The logger for message.
+                Default: None
+
+        Returns:
+            dict or OrderedDict: The loaded checkpoint.
+        """
+
+        checkpoint_loader = cls._get_checkpoint_loader(filename)
+        class_name = checkpoint_loader.__name__
+        mmcv.print_log(f'Use {class_name} loader', logger)
+        return checkpoint_loader(filename, map_location)
+
+
+@CheckpointLoader.register_scheme(prefixes='')
+def load_from_local(filename, map_location):
+    """load checkpoint by local file path.
+
+    Args:
+        filename (str): local checkpoint file path
+        map_location (str, optional): Same as :func:`torch.load`.
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+
+    if not osp.isfile(filename):
+        raise IOError(f'{filename} is not a checkpoint file')
+    checkpoint = torch.load(filename, map_location=map_location)
+    return checkpoint
+
+
+@CheckpointLoader.register_scheme(prefixes=('http://', 'https://'))
+def load_from_http(filename, map_location=None, model_dir=None):
+    """load checkpoint through HTTP or HTTPS scheme path. In distributed
+    setting, this function only download checkpoint at local rank 0.
+
+    Args:
+        filename (str): checkpoint file path with modelzoo or
+            torchvision prefix
+        map_location (str, optional): Same as :func:`torch.load`.
+        model_dir (string, optional): directory in which to save the object,
+            Default: None
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    rank, world_size = get_dist_info()
+    rank = int(os.environ.get('LOCAL_RANK', rank))
+    if rank == 0:
+        checkpoint = model_zoo.load_url(
+            filename, model_dir=model_dir, map_location=map_location)
+    if world_size > 1:
+        torch.distributed.barrier()
+        if rank > 0:
+            checkpoint = model_zoo.load_url(
+                filename, model_dir=model_dir, map_location=map_location)
+    return checkpoint
+
+
+@CheckpointLoader.register_scheme(prefixes='pavi://')
+def load_from_pavi(filename, map_location=None):
+    """load checkpoint through the file path prefixed with pavi. In distributed
+    setting, this function only download checkpoint at local rank 0.
+
+    Args:
+        filename (str): checkpoint file path with pavi prefix
+        map_location (str, optional): Same as :func:`torch.load`.
+          Default: None
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    assert filename.startswith('pavi://'), \
+        f'Expected filename startswith `pavi://`, but get {filename}'
+    model_path = filename[7:]
+
+    try:
+        from pavi import modelcloud
+    except ImportError:
+        raise ImportError(
+            'Please install pavi to load checkpoint from modelcloud.')
+    rank, world_size = get_dist_info()
+    rank = int(os.environ.get('LOCAL_RANK', rank))
+    if rank == 0:
+        model = modelcloud.get(model_path)
+        with TemporaryDirectory() as tmp_dir:
+            downloaded_file = osp.join(tmp_dir, model.name)
+            model.download(downloaded_file)
+            checkpoint = torch.load(downloaded_file, map_location=map_location)
+    if world_size > 1:
+        torch.distributed.barrier()
+        if rank > 0:
+            model = modelcloud.get(model_path)
+            with TemporaryDirectory() as tmp_dir:
+                downloaded_file = osp.join(tmp_dir, model.name)
+                model.download(downloaded_file)
+                checkpoint = torch.load(
+                    downloaded_file, map_location=map_location)
+    return checkpoint
+
+
+@CheckpointLoader.register_scheme(prefixes='s3://')
+def load_from_ceph(filename, map_location=None, backend='ceph'):
+    """load checkpoint through the file path prefixed with s3. In distributed
+    setting, this function only download checkpoint at local rank 0.
+
+    Args:
+        filename (str): checkpoint file path with s3 prefix
+        map_location (str, optional): Same as :func:`torch.load`.
+        backend (str): The storage backend type. Options are "disk", "ceph",
+            "memcached" and "lmdb". Default: 'ceph'
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    rank, world_size = get_dist_info()
+    rank = int(os.environ.get('LOCAL_RANK', rank))
+    allowed_backends = ['ceph']
+    if backend not in allowed_backends:
+        raise ValueError(f'Load from Backend {backend} is not supported.')
+    if rank == 0:
+        fileclient = FileClient(backend=backend)
+        buffer = io.BytesIO(fileclient.get(filename))
+        checkpoint = torch.load(buffer, map_location=map_location)
+    if world_size > 1:
+        torch.distributed.barrier()
+        if rank > 0:
+            fileclient = FileClient(backend=backend)
+            buffer = io.BytesIO(fileclient.get(filename))
+            checkpoint = torch.load(buffer, map_location=map_location)
+    return checkpoint
+
+
+@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://'))
+def load_from_torchvision(filename, map_location=None):
+    """load checkpoint through the file path prefixed with modelzoo or
+    torchvision.
+
+    Args:
+        filename (str): checkpoint file path with modelzoo or
+            torchvision prefix
+        map_location (str, optional): Same as :func:`torch.load`.
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    model_urls = get_torchvision_models()
+    if filename.startswith('modelzoo://'):
+        warnings.warn('The URL scheme of "modelzoo://" is deprecated, please '
+                      'use "torchvision://" instead')
+        model_name = filename[11:]
+    else:
+        model_name = filename[14:]
+    return load_from_http(model_urls[model_name], map_location=map_location)
+
+
+@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://'))
+def load_from_openmmlab(filename, map_location=None):
+    """load checkpoint through the file path prefixed with open-mmlab or
+    openmmlab.
+
+    Args:
+        filename (str): checkpoint file path with open-mmlab or
+        openmmlab prefix
+        map_location (str, optional): Same as :func:`torch.load`.
+          Default: None
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+
+    model_urls = get_external_models()
+    prefix_str = 'open-mmlab://'
+    if filename.startswith(prefix_str):
+        model_name = filename[13:]
+    else:
+        model_name = filename[12:]
+        prefix_str = 'openmmlab://'
+
+    deprecated_urls = get_deprecated_model_names()
+    if model_name in deprecated_urls:
+        warnings.warn(f'{prefix_str}{model_name} is deprecated in favor '
+                      f'of {prefix_str}{deprecated_urls[model_name]}')
+        model_name = deprecated_urls[model_name]
+    model_url = model_urls[model_name]
+    # check if is url
+    if model_url.startswith(('http://', 'https://')):
+        checkpoint = load_from_http(model_url, map_location=map_location)
+    else:
+        filename = osp.join(_get_mmcv_home(), model_url)
+        if not osp.isfile(filename):
+            raise IOError(f'{filename} is not a checkpoint file')
+        checkpoint = torch.load(filename, map_location=map_location)
+    return checkpoint
+
+
+@CheckpointLoader.register_scheme(prefixes='mmcls://')
+def load_from_mmcls(filename, map_location=None):
+    """load checkpoint through the file path prefixed with mmcls.
+
+    Args:
+        filename (str): checkpoint file path with mmcls prefix
+        map_location (str, optional): Same as :func:`torch.load`.
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+
+    model_urls = get_mmcls_models()
+    model_name = filename[8:]
+    checkpoint = load_from_http(
+        model_urls[model_name], map_location=map_location)
+    checkpoint = _process_mmcls_checkpoint(checkpoint)
+    return checkpoint
+
+
+def _load_checkpoint(filename, map_location=None, logger=None):
+    """Load checkpoint from somewhere (modelzoo, file, url).
+
+    Args:
+        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+            details.
+        map_location (str, optional): Same as :func:`torch.load`.
+           Default: None.
+        logger (:mod:`logging.Logger`, optional): The logger for error message.
+           Default: None
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint. It can be either an
+           OrderedDict storing model weights or a dict containing other
+           information, which depends on the checkpoint.
+    """
+    return CheckpointLoader.load_checkpoint(filename, map_location, logger)
+
+
+def _load_checkpoint_with_prefix(prefix, filename, map_location=None):
+    """Load partial pretrained model with specific prefix.
+
+    Args:
+        prefix (str): The prefix of sub-module.
+        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+            details.
+        map_location (str | None): Same as :func:`torch.load`. Default: None.
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+
+    checkpoint = _load_checkpoint(filename, map_location=map_location)
+
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+    if not prefix.endswith('.'):
+        prefix += '.'
+    prefix_len = len(prefix)
+
+    state_dict = {
+        k[prefix_len:]: v
+        for k, v in state_dict.items() if k.startswith(prefix)
+    }
+
+    assert state_dict, f'{prefix} is not in the pretrained model'
+    return state_dict
+
+
+def load_checkpoint(model,
+                    filename,
+                    map_location=None,
+                    strict=False,
+                    logger=None):
+    """Load checkpoint from a file or URI.
+
+    Args:
+        model (Module): Module to load checkpoint.
+        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
+            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
+            details.
+        map_location (str): Same as :func:`torch.load`.
+        strict (bool): Whether to allow different params for the model and
+            checkpoint.
+        logger (:mod:`logging.Logger` or None): The logger for error message.
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    checkpoint = _load_checkpoint(filename, map_location, logger)
+    # OrderedDict is a subclass of dict
+    if not isinstance(checkpoint, dict):
+        raise RuntimeError(
+            f'No state_dict found in checkpoint file {filename}')
+    # get state_dict from checkpoint
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+    # strip prefix of state_dict
+    if list(state_dict.keys())[0].startswith('module.'):
+        state_dict = {k[7:]: v for k, v in state_dict.items()}
+    # load state_dict
+    load_state_dict(model, state_dict, strict, logger)
+    return checkpoint
+
+
+def weights_to_cpu(state_dict):
+    """Copy a model state_dict to cpu.
+
+    Args:
+        state_dict (OrderedDict): Model weights on GPU.
+
+    Returns:
+        OrderedDict: Model weights on GPU.
+    """
+    state_dict_cpu = OrderedDict()
+    for key, val in state_dict.items():
+        state_dict_cpu[key] = val.cpu()
+    return state_dict_cpu
+
+
+def _save_to_state_dict(module, destination, prefix, keep_vars):
+    """Saves module state to `destination` dictionary.
+
+    This method is modified from :meth:`torch.nn.Module._save_to_state_dict`.
+
+    Args:
+        module (nn.Module): The module to generate state_dict.
+        destination (dict): A dict where state will be stored.
+        prefix (str): The prefix for parameters and buffers used in this
+            module.
+    """
+    for name, param in module._parameters.items():
+        if param is not None:
+            destination[prefix + name] = param if keep_vars else param.detach()
+    for name, buf in module._buffers.items():
+        # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d
+        if buf is not None:
+            destination[prefix + name] = buf if keep_vars else buf.detach()
+
+
+def get_state_dict(module, destination=None, prefix='', keep_vars=False):
+    """Returns a dictionary containing a whole state of the module.
+
+    Both parameters and persistent buffers (e.g. running averages) are
+    included. Keys are corresponding parameter and buffer names.
+
+    This method is modified from :meth:`torch.nn.Module.state_dict` to
+    recursively check parallel module in case that the model has a complicated
+    structure, e.g., nn.Module(nn.Module(DDP)).
+
+    Args:
+        module (nn.Module): The module to generate state_dict.
+        destination (OrderedDict): Returned dict for the state of the
+            module.
+        prefix (str): Prefix of the key.
+        keep_vars (bool): Whether to keep the variable property of the
+            parameters. Default: False.
+
+    Returns:
+        dict: A dictionary containing a whole state of the module.
+    """
+    # recursively check parallel module in case that the model has a
+    # complicated structure, e.g., nn.Module(nn.Module(DDP))
+    if is_module_wrapper(module):
+        module = module.module
+
+    # below is the same as torch.nn.Module.state_dict()
+    if destination is None:
+        destination = OrderedDict()
+        destination._metadata = OrderedDict()
+    destination._metadata[prefix[:-1]] = local_metadata = dict(
+        version=module._version)
+    _save_to_state_dict(module, destination, prefix, keep_vars)
+    for name, child in module._modules.items():
+        if child is not None:
+            get_state_dict(
+                child, destination, prefix + name + '.', keep_vars=keep_vars)
+    for hook in module._state_dict_hooks.values():
+        hook_result = hook(module, destination, prefix, local_metadata)
+        if hook_result is not None:
+            destination = hook_result
+    return destination
+
+
+def save_checkpoint(model, filename, optimizer=None, meta=None):
+    """Save checkpoint to file.
+
+    The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
+    ``optimizer``. By default ``meta`` will contain version and time info.
+
+    Args:
+        model (Module): Module whose params are to be saved.
+        filename (str): Checkpoint filename.
+        optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
+        meta (dict, optional): Metadata to be saved in checkpoint.
+    """
+    if meta is None:
+        meta = {}
+    elif not isinstance(meta, dict):
+        raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
+    meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
+
+    if is_module_wrapper(model):
+        model = model.module
+
+    if hasattr(model, 'CLASSES') and model.CLASSES is not None:
+        # save class name to the meta
+        meta.update(CLASSES=model.CLASSES)
+
+    checkpoint = {
+        'meta': meta,
+        'state_dict': weights_to_cpu(get_state_dict(model))
+    }
+    # save optimizer state dict in the checkpoint
+    if isinstance(optimizer, Optimizer):
+        checkpoint['optimizer'] = optimizer.state_dict()
+    elif isinstance(optimizer, dict):
+        checkpoint['optimizer'] = {}
+        for name, optim in optimizer.items():
+            checkpoint['optimizer'][name] = optim.state_dict()
+
+    if filename.startswith('pavi://'):
+        try:
+            from pavi import modelcloud
+            from pavi.exception import NodeNotFoundError
+        except ImportError:
+            raise ImportError(
+                'Please install pavi to load checkpoint from modelcloud.')
+        model_path = filename[7:]
+        root = modelcloud.Folder()
+        model_dir, model_name = osp.split(model_path)
+        try:
+            model = modelcloud.get(model_dir)
+        except NodeNotFoundError:
+            model = root.create_training_model(model_dir)
+        with TemporaryDirectory() as tmp_dir:
+            checkpoint_file = osp.join(tmp_dir, model_name)
+            with open(checkpoint_file, 'wb') as f:
+                torch.save(checkpoint, f)
+                f.flush()
+            model.create_file(checkpoint_file, name=model_name)
+    else:
+        mmcv.mkdir_or_exist(osp.dirname(filename))
+        # immediately flush buffer
+        with open(filename, 'wb') as f:
+            torch.save(checkpoint, f)
+            f.flush()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/dist_utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/dist_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a9ccf35aff28b1314859cd0cc6712b9a741ff84
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/dist_utils.py
@@ -0,0 +1,173 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import functools
+import os
+import subprocess
+from collections import OrderedDict
+
+import torch
+import torch.multiprocessing as mp
+from torch import distributed as dist
+from torch._utils import (_flatten_dense_tensors, _take_tensors,
+                          _unflatten_dense_tensors)
+
+from mmcv.utils import TORCH_VERSION
+
+
+def init_dist(launcher, backend='nccl', **kwargs):
+    if mp.get_start_method(allow_none=True) is None:
+        mp.set_start_method('spawn')
+    if launcher == 'pytorch':
+        _init_dist_pytorch(backend, **kwargs)
+    elif launcher == 'mpi':
+        _init_dist_mpi(backend, **kwargs)
+    elif launcher == 'slurm':
+        _init_dist_slurm(backend, **kwargs)
+    else:
+        raise ValueError(f'Invalid launcher type: {launcher}')
+
+
+def _init_dist_pytorch(backend, **kwargs):
+    # TODO: use local_rank instead of rank % num_gpus
+    rank = int(os.environ['RANK'])
+    num_gpus = torch.cuda.device_count()
+    torch.cuda.set_device(rank % num_gpus)
+    dist.init_process_group(backend=backend, **kwargs)
+
+
+def _init_dist_mpi(backend, **kwargs):
+    # TODO: use local_rank instead of rank % num_gpus
+    rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
+    num_gpus = torch.cuda.device_count()
+    torch.cuda.set_device(rank % num_gpus)
+    dist.init_process_group(backend=backend, **kwargs)
+
+
+def _init_dist_slurm(backend, port=None):
+    """Initialize slurm distributed training environment.
+
+    If argument ``port`` is not specified, then the master port will be system
+    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
+    environment variable, then a default port ``29500`` will be used.
+
+    Args:
+        backend (str): Backend of torch.distributed.
+        port (int, optional): Master port. Defaults to None.
+    """
+    proc_id = int(os.environ['SLURM_PROCID'])
+    ntasks = int(os.environ['SLURM_NTASKS'])
+    node_list = os.environ['SLURM_NODELIST']
+    num_gpus = torch.cuda.device_count()
+    torch.cuda.set_device(proc_id % num_gpus)
+    addr = subprocess.getoutput(
+        f'scontrol show hostname {node_list} | head -n1')
+    # specify master port
+    if port is not None:
+        os.environ['MASTER_PORT'] = str(port)
+    elif 'MASTER_PORT' in os.environ:
+        pass  # use MASTER_PORT in the environment variable
+    else:
+        # 29500 is torch.distributed default port
+        os.environ['MASTER_PORT'] = '29500'
+    # use MASTER_ADDR in the environment variable if it already exists
+    if 'MASTER_ADDR' not in os.environ:
+        os.environ['MASTER_ADDR'] = addr
+    os.environ['WORLD_SIZE'] = str(ntasks)
+    os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
+    os.environ['RANK'] = str(proc_id)
+    dist.init_process_group(backend=backend)
+
+
+def get_dist_info():
+    if TORCH_VERSION < '1.0':
+        initialized = dist._initialized
+    else:
+        if dist.is_available():
+            initialized = dist.is_initialized()
+        else:
+            initialized = False
+    if initialized:
+        rank = dist.get_rank()
+        world_size = dist.get_world_size()
+    else:
+        rank = 0
+        world_size = 1
+    return rank, world_size
+
+
+def master_only(func):
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        rank, _ = get_dist_info()
+        if rank == 0:
+            return func(*args, **kwargs)
+
+    return wrapper
+
+
+def allreduce_params(params, coalesce=True, bucket_size_mb=-1):
+    """Allreduce parameters.
+
+    Args:
+        params (list[torch.Parameters]): List of parameters or buffers of a
+            model.
+        coalesce (bool, optional): Whether allreduce parameters as a whole.
+            Defaults to True.
+        bucket_size_mb (int, optional): Size of bucket, the unit is MB.
+            Defaults to -1.
+    """
+    _, world_size = get_dist_info()
+    if world_size == 1:
+        return
+    params = [param.data for param in params]
+    if coalesce:
+        _allreduce_coalesced(params, world_size, bucket_size_mb)
+    else:
+        for tensor in params:
+            dist.all_reduce(tensor.div_(world_size))
+
+
+def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
+    """Allreduce gradients.
+
+    Args:
+        params (list[torch.Parameters]): List of parameters of a model
+        coalesce (bool, optional): Whether allreduce parameters as a whole.
+            Defaults to True.
+        bucket_size_mb (int, optional): Size of bucket, the unit is MB.
+            Defaults to -1.
+    """
+    grads = [
+        param.grad.data for param in params
+        if param.requires_grad and param.grad is not None
+    ]
+    _, world_size = get_dist_info()
+    if world_size == 1:
+        return
+    if coalesce:
+        _allreduce_coalesced(grads, world_size, bucket_size_mb)
+    else:
+        for tensor in grads:
+            dist.all_reduce(tensor.div_(world_size))
+
+
+def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
+    if bucket_size_mb > 0:
+        bucket_size_bytes = bucket_size_mb * 1024 * 1024
+        buckets = _take_tensors(tensors, bucket_size_bytes)
+    else:
+        buckets = OrderedDict()
+        for tensor in tensors:
+            tp = tensor.type()
+            if tp not in buckets:
+                buckets[tp] = []
+            buckets[tp].append(tensor)
+        buckets = buckets.values()
+
+    for bucket in buckets:
+        flat_tensors = _flatten_dense_tensors(bucket)
+        dist.all_reduce(flat_tensors)
+        flat_tensors.div_(world_size)
+        for tensor, synced in zip(
+                bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
+            tensor.copy_(synced)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/epoch_based_runner.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/epoch_based_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ec616ce13c36f3861c6c24b258f24a7d6dd619c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/epoch_based_runner.py
@@ -0,0 +1,182 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import platform
+import shutil
+import time
+import warnings
+
+import torch
+
+import mmcv
+from .base_runner import BaseRunner
+from .builder import RUNNERS
+from .checkpoint import save_checkpoint
+from .utils import get_host_info
+
+
+@RUNNERS.register_module()
+class EpochBasedRunner(BaseRunner):
+    """Epoch-based Runner.
+
+    This runner train models epoch by epoch.
+    """
+
+    def run_iter(self, data_batch, train_mode, **kwargs):
+        if self.batch_processor is not None:
+            outputs = self.batch_processor(
+                self.model, data_batch, train_mode=train_mode, **kwargs)
+        elif train_mode:
+            outputs = self.model.train_step(data_batch, self.optimizer,
+                                            **kwargs)
+        else:
+            outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
+        if not isinstance(outputs, dict):
+            raise TypeError('"batch_processor()" or "model.train_step()"'
+                            'and "model.val_step()" must return a dict')
+        if 'log_vars' in outputs:
+            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
+        self.outputs = outputs
+
+    def train(self, data_loader, **kwargs):
+        self.model.train()
+        self.mode = 'train'
+        self.data_loader = data_loader
+        self._max_iters = self._max_epochs * len(self.data_loader)
+        self.call_hook('before_train_epoch')
+        time.sleep(2)  # Prevent possible deadlock during epoch transition
+        for i, data_batch in enumerate(self.data_loader):
+            self._inner_iter = i
+            self.call_hook('before_train_iter')
+            self.run_iter(data_batch, train_mode=True)
+            self.call_hook('after_train_iter')
+            self._iter += 1
+
+        self.call_hook('after_train_epoch')
+        self._epoch += 1
+
+    @torch.no_grad()
+    def val(self, data_loader, **kwargs):
+        self.model.eval()
+        self.mode = 'val'
+        self.data_loader = data_loader
+        self.call_hook('before_val_epoch')
+        time.sleep(2)  # Prevent possible deadlock during epoch transition
+        for i, data_batch in enumerate(self.data_loader):
+            self._inner_iter = i
+            self.call_hook('before_val_iter')
+            self.run_iter(data_batch, train_mode=False)
+            self.call_hook('after_val_iter')
+
+        self.call_hook('after_val_epoch')
+
+    def run(self, data_loaders, workflow, max_epochs=None, **kwargs):
+        """Start running.
+
+        Args:
+            data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
+                and validation.
+            workflow (list[tuple]): A list of (phase, epochs) to specify the
+                running order and epochs. E.g, [('train', 2), ('val', 1)] means
+                running 2 epochs for training and 1 epoch for validation,
+                iteratively.
+        """
+        assert isinstance(data_loaders, list)
+        assert mmcv.is_list_of(workflow, tuple)
+        assert len(data_loaders) == len(workflow)
+        if max_epochs is not None:
+            warnings.warn(
+                'setting max_epochs in run is deprecated, '
+                'please set max_epochs in runner_config', DeprecationWarning)
+            self._max_epochs = max_epochs
+
+        assert self._max_epochs is not None, (
+            'max_epochs must be specified during instantiation')
+
+        for i, flow in enumerate(workflow):
+            mode, epochs = flow
+            if mode == 'train':
+                self._max_iters = self._max_epochs * len(data_loaders[i])
+                break
+
+        work_dir = self.work_dir if self.work_dir is not None else 'NONE'
+        self.logger.info('Start running, host: %s, work_dir: %s',
+                         get_host_info(), work_dir)
+        self.logger.info('workflow: %s, max: %d epochs', workflow,
+                         self._max_epochs)
+        self.call_hook('before_run')
+
+        while self.epoch < self._max_epochs:
+            for i, flow in enumerate(workflow):
+                mode, epochs = flow
+                if isinstance(mode, str):  # self.train()
+                    if not hasattr(self, mode):
+                        raise ValueError(
+                            f'runner has no method named "{mode}" to run an '
+                            'epoch')
+                    epoch_runner = getattr(self, mode)
+                else:
+                    raise TypeError(
+                        'mode in workflow must be a str, but got {}'.format(
+                            type(mode)))
+
+                for _ in range(epochs):
+                    if mode == 'train' and self.epoch >= self._max_epochs:
+                        break
+                    epoch_runner(data_loaders[i], **kwargs)
+
+        time.sleep(1)  # wait for some hooks like loggers to finish
+        self.call_hook('after_run')
+
+    def save_checkpoint(self,
+                        out_dir,
+                        filename_tmpl='epoch_{}.pth',
+                        save_optimizer=True,
+                        meta=None,
+                        create_symlink=True):
+        """Save the checkpoint.
+
+        Args:
+            out_dir (str): The directory that checkpoints are saved.
+            filename_tmpl (str, optional): The checkpoint filename template,
+                which contains a placeholder for the epoch number.
+                Defaults to 'epoch_{}.pth'.
+            save_optimizer (bool, optional): Whether to save the optimizer to
+                the checkpoint. Defaults to True.
+            meta (dict, optional): The meta information to be saved in the
+                checkpoint. Defaults to None.
+            create_symlink (bool, optional): Whether to create a symlink
+                "latest.pth" to point to the latest checkpoint.
+                Defaults to True.
+        """
+        if meta is None:
+            meta = dict(epoch=self.epoch + 1, iter=self.iter)
+        elif isinstance(meta, dict):
+            meta.update(epoch=self.epoch + 1, iter=self.iter)
+        else:
+            raise TypeError(
+                f'meta should be a dict or None, but got {type(meta)}')
+        if self.meta is not None:
+            meta.update(self.meta)
+
+        filename = filename_tmpl.format(self.epoch + 1)
+        filepath = osp.join(out_dir, filename)
+        optimizer = self.optimizer if save_optimizer else None
+        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
+        # in some environments, `os.symlink` is not supported, you may need to
+        # set `create_symlink` to False
+        if create_symlink:
+            dst_file = osp.join(out_dir, 'latest.pth')
+            if platform.system() != 'Windows':
+                mmcv.symlink(filename, dst_file)
+            else:
+                shutil.copy(filepath, dst_file)
+
+
+@RUNNERS.register_module()
+class Runner(EpochBasedRunner):
+    """Deprecated name of EpochBasedRunner."""
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            'Runner was deprecated, please use EpochBasedRunner instead')
+        super().__init__(*args, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/fp16_utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/fp16_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9ab9a13aab06d6e884afa4b3a12bff6e3a085d3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/fp16_utils.py
@@ -0,0 +1,350 @@
+import functools
+import warnings
+from collections import abc
+from inspect import getfullargspec
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from .dist_utils import allreduce_grads as _allreduce_grads
+
+
+def cast_tensor_type(inputs, src_type, dst_type):
+    """Recursively convert Tensor in inputs from src_type to dst_type.
+
+    Args:
+        inputs: Inputs that to be casted.
+        src_type (torch.dtype): Source type..
+        dst_type (torch.dtype): Destination type.
+
+    Returns:
+        The same type with inputs, but all contained Tensors have been cast.
+    """
+    if isinstance(inputs, torch.Tensor):
+        return inputs.to(dst_type)
+    elif isinstance(inputs, str):
+        return inputs
+    elif isinstance(inputs, np.ndarray):
+        return inputs
+    elif isinstance(inputs, abc.Mapping):
+        return type(inputs)({
+            k: cast_tensor_type(v, src_type, dst_type)
+            for k, v in inputs.items()
+        })
+    elif isinstance(inputs, abc.Iterable):
+        return type(inputs)(
+            cast_tensor_type(item, src_type, dst_type) for item in inputs)
+    else:
+        return inputs
+
+
+def auto_fp16(apply_to=None, out_fp32=False):
+    """Decorator to enable fp16 training automatically.
+
+    This decorator is useful when you write custom modules and want to support
+    mixed precision training. If inputs arguments are fp32 tensors, they will
+    be converted to fp16 automatically. Arguments other than fp32 tensors are
+    ignored.
+
+    Args:
+        apply_to (Iterable, optional): The argument names to be converted.
+            `None` indicates all arguments.
+        out_fp32 (bool): Whether to convert the output back to fp32.
+
+    Example:
+
+        >>> import torch.nn as nn
+        >>> class MyModule1(nn.Module):
+        >>>
+        >>>     # Convert x and y to fp16
+        >>>     @auto_fp16()
+        >>>     def forward(self, x, y):
+        >>>         pass
+
+        >>> import torch.nn as nn
+        >>> class MyModule2(nn.Module):
+        >>>
+        >>>     # convert pred to fp16
+        >>>     @auto_fp16(apply_to=('pred', ))
+        >>>     def do_something(self, pred, others):
+        >>>         pass
+    """
+
+    def auto_fp16_wrapper(old_func):
+
+        @functools.wraps(old_func)
+        def new_func(*args, **kwargs):
+            # check if the module has set the attribute `fp16_enabled`, if not,
+            # just fallback to the original method.
+            if not isinstance(args[0], torch.nn.Module):
+                raise TypeError('@auto_fp16 can only be used to decorate the '
+                                'method of nn.Module')
+            if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+                return old_func(*args, **kwargs)
+            # get the arg spec of the decorated method
+            args_info = getfullargspec(old_func)
+            # get the argument names to be casted
+            args_to_cast = args_info.args if apply_to is None else apply_to
+            # convert the args that need to be processed
+            new_args = []
+            # NOTE: default args are not taken into consideration
+            if args:
+                arg_names = args_info.args[:len(args)]
+                for i, arg_name in enumerate(arg_names):
+                    if arg_name in args_to_cast:
+                        new_args.append(
+                            cast_tensor_type(args[i], torch.float, torch.half))
+                    else:
+                        new_args.append(args[i])
+            # convert the kwargs that need to be processed
+            new_kwargs = {}
+            if kwargs:
+                for arg_name, arg_value in kwargs.items():
+                    if arg_name in args_to_cast:
+                        new_kwargs[arg_name] = cast_tensor_type(
+                            arg_value, torch.float, torch.half)
+                    else:
+                        new_kwargs[arg_name] = arg_value
+            # apply converted arguments to the decorated method
+            output = old_func(*new_args, **new_kwargs)
+            # cast the results back to fp32 if necessary
+            if out_fp32:
+                output = cast_tensor_type(output, torch.half, torch.float)
+            return output
+
+        return new_func
+
+    return auto_fp16_wrapper
+
+
+def force_fp32(apply_to=None, out_fp16=False):
+    """Decorator to convert input arguments to fp32 in force.
+
+    This decorator is useful when you write custom modules and want to support
+    mixed precision training. If there are some inputs that must be processed
+    in fp32 mode, then this decorator can handle it. If inputs arguments are
+    fp16 tensors, they will be converted to fp32 automatically. Arguments other
+    than fp16 tensors are ignored.
+
+    Args:
+        apply_to (Iterable, optional): The argument names to be converted.
+            `None` indicates all arguments.
+        out_fp16 (bool): Whether to convert the output back to fp16.
+
+    Example:
+
+        >>> import torch.nn as nn
+        >>> class MyModule1(nn.Module):
+        >>>
+        >>>     # Convert x and y to fp32
+        >>>     @force_fp32()
+        >>>     def loss(self, x, y):
+        >>>         pass
+
+        >>> import torch.nn as nn
+        >>> class MyModule2(nn.Module):
+        >>>
+        >>>     # convert pred to fp32
+        >>>     @force_fp32(apply_to=('pred', ))
+        >>>     def post_process(self, pred, others):
+        >>>         pass
+    """
+
+    def force_fp32_wrapper(old_func):
+
+        @functools.wraps(old_func)
+        def new_func(*args, **kwargs):
+            # check if the module has set the attribute `fp16_enabled`, if not,
+            # just fallback to the original method.
+            if not isinstance(args[0], torch.nn.Module):
+                raise TypeError('@force_fp32 can only be used to decorate the '
+                                'method of nn.Module')
+            if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+                return old_func(*args, **kwargs)
+            # get the arg spec of the decorated method
+            args_info = getfullargspec(old_func)
+            # get the argument names to be casted
+            args_to_cast = args_info.args if apply_to is None else apply_to
+            # convert the args that need to be processed
+            new_args = []
+            if args:
+                arg_names = args_info.args[:len(args)]
+                for i, arg_name in enumerate(arg_names):
+                    if arg_name in args_to_cast:
+                        new_args.append(
+                            cast_tensor_type(args[i], torch.half, torch.float))
+                    else:
+                        new_args.append(args[i])
+            # convert the kwargs that need to be processed
+            new_kwargs = dict()
+            if kwargs:
+                for arg_name, arg_value in kwargs.items():
+                    if arg_name in args_to_cast:
+                        new_kwargs[arg_name] = cast_tensor_type(
+                            arg_value, torch.half, torch.float)
+                    else:
+                        new_kwargs[arg_name] = arg_value
+            # apply converted arguments to the decorated method
+            output = old_func(*new_args, **new_kwargs)
+            # cast the results back to fp32 if necessary
+            if out_fp16:
+                output = cast_tensor_type(output, torch.float, torch.half)
+            return output
+
+        return new_func
+
+    return force_fp32_wrapper
+
+
+def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
+    warnings.warning(
+        '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be '
+        'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads')
+    _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb)
+
+
+def wrap_fp16_model(model):
+    """Wrap the FP32 model to FP16.
+
+    1. Convert FP32 model to FP16.
+    2. Remain some necessary layers to be FP32, e.g., normalization layers.
+
+    Args:
+        model (nn.Module): Model in FP32.
+    """
+    # convert model to fp16
+    model.half()
+    # patch the normalization layers to make it work in fp32 mode
+    patch_norm_fp32(model)
+    # set `fp16_enabled` flag
+    for m in model.modules():
+        if hasattr(m, 'fp16_enabled'):
+            m.fp16_enabled = True
+
+
+def patch_norm_fp32(module):
+    """Recursively convert normalization layers from FP16 to FP32.
+
+    Args:
+        module (nn.Module): The modules to be converted in FP16.
+
+    Returns:
+        nn.Module: The converted module, the normalization layers have been
+            converted to FP32.
+    """
+    if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)):
+        module.float()
+        if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3':
+            module.forward = patch_forward_method(module.forward, torch.half,
+                                                  torch.float)
+    for child in module.children():
+        patch_norm_fp32(child)
+    return module
+
+
+def patch_forward_method(func, src_type, dst_type, convert_output=True):
+    """Patch the forward method of a module.
+
+    Args:
+        func (callable): The original forward method.
+        src_type (torch.dtype): Type of input arguments to be converted from.
+        dst_type (torch.dtype): Type of input arguments to be converted to.
+        convert_output (bool): Whether to convert the output back to src_type.
+
+    Returns:
+        callable: The patched forward method.
+    """
+
+    def new_forward(*args, **kwargs):
+        output = func(*cast_tensor_type(args, src_type, dst_type),
+                      **cast_tensor_type(kwargs, src_type, dst_type))
+        if convert_output:
+            output = cast_tensor_type(output, dst_type, src_type)
+        return output
+
+    return new_forward
+
+
+class LossScaler:
+    """Class that manages loss scaling in mixed precision training which
+    supports both dynamic or static mode.
+
+    The implementation refers to
+    https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py.
+    Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling.
+    It's important to understand how :class:`LossScaler` operates.
+    Loss scaling is designed to combat the problem of underflowing
+    gradients encountered at long times when training fp16 networks.
+    Dynamic loss scaling begins by attempting a very high loss
+    scale.  Ironically, this may result in OVERflowing gradients.
+    If overflowing gradients are encountered, :class:`FP16_Optimizer` then
+    skips the update step for this particular iteration/minibatch,
+    and :class:`LossScaler` adjusts the loss scale to a lower value.
+    If a certain number of iterations occur without overflowing gradients
+    detected,:class:`LossScaler` increases the loss scale once more.
+    In this way :class:`LossScaler` attempts to "ride the edge" of always
+    using the highest loss scale possible without incurring overflow.
+
+    Args:
+        init_scale (float): Initial loss scale value, default: 2**32.
+        scale_factor (float): Factor used when adjusting the loss scale.
+            Default: 2.
+        mode (str): Loss scaling mode. 'dynamic' or 'static'
+        scale_window (int): Number of consecutive iterations without an
+            overflow to wait before increasing the loss scale. Default: 1000.
+    """
+
+    def __init__(self,
+                 init_scale=2**32,
+                 mode='dynamic',
+                 scale_factor=2.,
+                 scale_window=1000):
+        self.cur_scale = init_scale
+        self.cur_iter = 0
+        assert mode in ('dynamic',
+                        'static'), 'mode can only be dynamic or static'
+        self.mode = mode
+        self.last_overflow_iter = -1
+        self.scale_factor = scale_factor
+        self.scale_window = scale_window
+
+    def has_overflow(self, params):
+        """Check if params contain overflow."""
+        if self.mode != 'dynamic':
+            return False
+        for p in params:
+            if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data):
+                return True
+        return False
+
+    def _has_inf_or_nan(x):
+        """Check if params contain NaN."""
+        try:
+            cpu_sum = float(x.float().sum())
+        except RuntimeError as instance:
+            if 'value cannot be converted' not in instance.args[0]:
+                raise
+            return True
+        else:
+            if cpu_sum == float('inf') or cpu_sum == -float('inf') \
+                    or cpu_sum != cpu_sum:
+                return True
+            return False
+
+    def update_scale(self, overflow):
+        """update the current loss scale value when overflow happens."""
+        if self.mode != 'dynamic':
+            return
+        if overflow:
+            self.cur_scale = max(self.cur_scale / self.scale_factor, 1)
+            self.last_overflow_iter = self.cur_iter
+        else:
+            if (self.cur_iter - self.last_overflow_iter) % \
+                    self.scale_window == 0:
+                self.cur_scale *= self.scale_factor
+        self.cur_iter += 1
+
+    @property
+    def loss_scale(self):
+        return self.cur_scale
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2d5a95144b48299ade9cc1895f7b8b3c6ce3c04
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .checkpoint import CheckpointHook
+from .closure import ClosureHook
+from .ema import EMAHook
+from .hook import HOOKS, Hook
+from .iter_timer import IterTimerHook
+from .logger import (LoggerHook, MlflowLoggerHook, PaviLoggerHook,
+                     TensorboardLoggerHook, TextLoggerHook, WandbLoggerHook)
+from .lr_updater import LrUpdaterHook
+from .memory import EmptyCacheHook
+from .momentum_updater import MomentumUpdaterHook
+from .optimizer import Fp16OptimizerHook, OptimizerHook
+from .sampler_seed import DistSamplerSeedHook
+from .sync_buffer import SyncBuffersHook
+
+__all__ = [
+    'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook',
+    'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook',
+    'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook',
+    'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook',
+    'WandbLoggerHook', 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/checkpoint.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..1912cf6373ed618beb2d19f429cd9383c4e4c924
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/checkpoint.py
@@ -0,0 +1,101 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os
+
+from ..dist_utils import allreduce_params, master_only
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class CheckpointHook(Hook):
+    """Save checkpoints periodically.
+
+    Args:
+        interval (int): The saving period. If ``by_epoch=True``, interval
+            indicates epochs, otherwise it indicates iterations.
+            Default: -1, which means "never".
+        by_epoch (bool): Saving checkpoints by epoch or by iteration.
+            Default: True.
+        save_optimizer (bool): Whether to save optimizer state_dict in the
+            checkpoint. It is usually used for resuming experiments.
+            Default: True.
+        out_dir (str, optional): The directory to save checkpoints. If not
+            specified, ``runner.work_dir`` will be used by default.
+        max_keep_ckpts (int, optional): The maximum checkpoints to keep.
+            In some cases we want only the latest few checkpoints and would
+            like to delete old ones to save the disk space.
+            Default: -1, which means unlimited.
+        sync_buffer (bool): Whether to synchronize buffers in different
+            gpus. Default: False.
+    """
+
+    def __init__(self,
+                 interval=-1,
+                 by_epoch=True,
+                 save_optimizer=True,
+                 out_dir=None,
+                 max_keep_ckpts=-1,
+                 sync_buffer=False,
+                 **kwargs):
+        self.interval = interval
+        self.by_epoch = by_epoch
+        self.save_optimizer = save_optimizer
+        self.out_dir = out_dir
+        self.max_keep_ckpts = max_keep_ckpts
+        self.args = kwargs
+        self.sync_buffer = sync_buffer
+
+    def after_train_epoch(self, runner):
+        if not self.by_epoch or not self.every_n_epochs(runner, self.interval):
+            return
+
+        runner.logger.info(f'Saving checkpoint at {runner.epoch + 1} epochs')
+        if self.sync_buffer:
+            allreduce_params(runner.model.buffers())
+        self._save_checkpoint(runner)
+
+    @master_only
+    def _save_checkpoint(self, runner):
+        """Save the current checkpoint and delete unwanted checkpoint."""
+        if not self.out_dir:
+            self.out_dir = runner.work_dir
+        runner.save_checkpoint(
+            self.out_dir, save_optimizer=self.save_optimizer, **self.args)
+        if runner.meta is not None:
+            if self.by_epoch:
+                cur_ckpt_filename = self.args.get(
+                    'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1)
+            else:
+                cur_ckpt_filename = self.args.get(
+                    'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1)
+            runner.meta.setdefault('hook_msgs', dict())
+            runner.meta['hook_msgs']['last_ckpt'] = os.path.join(
+                self.out_dir, cur_ckpt_filename)
+        # remove other checkpoints
+        if self.max_keep_ckpts > 0:
+            if self.by_epoch:
+                name = 'epoch_{}.pth'
+                current_ckpt = runner.epoch + 1
+            else:
+                name = 'iter_{}.pth'
+                current_ckpt = runner.iter + 1
+            redundant_ckpts = range(
+                current_ckpt - self.max_keep_ckpts * self.interval, 0,
+                -self.interval)
+            filename_tmpl = self.args.get('filename_tmpl', name)
+            for _step in redundant_ckpts:
+                ckpt_path = os.path.join(self.out_dir,
+                                         filename_tmpl.format(_step))
+                if os.path.exists(ckpt_path):
+                    os.remove(ckpt_path)
+                else:
+                    break
+
+    def after_train_iter(self, runner):
+        if self.by_epoch or not self.every_n_iters(runner, self.interval):
+            return
+
+        runner.logger.info(
+            f'Saving checkpoint at {runner.iter + 1} iterations')
+        if self.sync_buffer:
+            allreduce_params(runner.model.buffers())
+        self._save_checkpoint(runner)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/closure.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/closure.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea531f632bf1fbb0d9782c87de970ff1f622a797
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/closure.py
@@ -0,0 +1,11 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class ClosureHook(Hook):
+
+    def __init__(self, fn_name, fn):
+        assert hasattr(self, fn_name)
+        assert callable(fn)
+        setattr(self, fn_name, fn)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/ema.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/ema.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5fe738dc38ea9dc71390dcf429aa84da8607b2a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/ema.py
@@ -0,0 +1,88 @@
+from ...parallel import is_module_wrapper
+from ..hooks.hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class EMAHook(Hook):
+    r"""Exponential Moving Average Hook.
+
+    Use Exponential Moving Average on all parameters of model in training
+    process. All parameters have a ema backup, which update by the formula
+    as below. EMAHook takes priority over EvalHook and CheckpointSaverHook.
+
+        .. math::
+
+            \text{Xema_{t+1}} = (1 - \text{momentum}) \times
+            \text{Xema_{t}} +  \text{momentum} \times X_t
+
+    Args:
+        momentum (float): The momentum used for updating ema parameter.
+            Defaults to 0.0002.
+        interval (int): Update ema parameter every interval iteration.
+            Defaults to 1.
+        warm_up (int): During first warm_up steps, we may use smaller momentum
+            to update ema parameters more slowly. Defaults to 100.
+        resume_from (str): The checkpoint path. Defaults to None.
+    """
+
+    def __init__(self,
+                 momentum=0.0002,
+                 interval=1,
+                 warm_up=100,
+                 resume_from=None):
+        assert isinstance(interval, int) and interval > 0
+        self.warm_up = warm_up
+        self.interval = interval
+        assert momentum > 0 and momentum < 1
+        self.momentum = momentum**interval
+        self.checkpoint = resume_from
+
+    def before_run(self, runner):
+        """To resume model with it's ema parameters more friendly.
+
+        Register ema parameter as ``named_buffer`` to model
+        """
+        model = runner.model
+        if is_module_wrapper(model):
+            model = model.module
+        self.param_ema_buffer = {}
+        self.model_parameters = dict(model.named_parameters(recurse=True))
+        for name, value in self.model_parameters.items():
+            # "." is not allowed in module's buffer name
+            buffer_name = f"ema_{name.replace('.', '_')}"
+            self.param_ema_buffer[name] = buffer_name
+            model.register_buffer(buffer_name, value.data.clone())
+        self.model_buffers = dict(model.named_buffers(recurse=True))
+        if self.checkpoint is not None:
+            runner.resume(self.checkpoint)
+
+    def after_train_iter(self, runner):
+        """Update ema parameter every self.interval iterations."""
+        curr_step = runner.iter
+        # We warm up the momentum considering the instability at beginning
+        momentum = min(self.momentum,
+                       (1 + curr_step) / (self.warm_up + curr_step))
+        if curr_step % self.interval != 0:
+            return
+        for name, parameter in self.model_parameters.items():
+            buffer_name = self.param_ema_buffer[name]
+            buffer_parameter = self.model_buffers[buffer_name]
+            buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data)
+
+    def after_train_epoch(self, runner):
+        """We load parameter values from ema backup to model before the
+        EvalHook."""
+        self._swap_ema_parameters()
+
+    def before_train_epoch(self, runner):
+        """We recover model's parameter from ema backup after last epoch's
+        EvalHook."""
+        self._swap_ema_parameters()
+
+    def _swap_ema_parameters(self):
+        """Swap the parameter of model with parameter in ema_buffer."""
+        for name, value in self.model_parameters.items():
+            temp = value.data.clone()
+            ema_buffer = self.model_buffers[self.param_ema_buffer[name]]
+            value.data.copy_(ema_buffer.data)
+            ema_buffer.data.copy_(temp)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/hook.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/hook.py
new file mode 100644
index 0000000000000000000000000000000000000000..296ffde723798e9be3ed4aba6b371f987e4a6eef
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/hook.py
@@ -0,0 +1,61 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from mmcv.utils import Registry
+
+HOOKS = Registry('hook')
+
+
+class Hook:
+
+    def before_run(self, runner):
+        pass
+
+    def after_run(self, runner):
+        pass
+
+    def before_epoch(self, runner):
+        pass
+
+    def after_epoch(self, runner):
+        pass
+
+    def before_iter(self, runner):
+        pass
+
+    def after_iter(self, runner):
+        pass
+
+    def before_train_epoch(self, runner):
+        self.before_epoch(runner)
+
+    def before_val_epoch(self, runner):
+        self.before_epoch(runner)
+
+    def after_train_epoch(self, runner):
+        self.after_epoch(runner)
+
+    def after_val_epoch(self, runner):
+        self.after_epoch(runner)
+
+    def before_train_iter(self, runner):
+        self.before_iter(runner)
+
+    def before_val_iter(self, runner):
+        self.before_iter(runner)
+
+    def after_train_iter(self, runner):
+        self.after_iter(runner)
+
+    def after_val_iter(self, runner):
+        self.after_iter(runner)
+
+    def every_n_epochs(self, runner, n):
+        return (runner.epoch + 1) % n == 0 if n > 0 else False
+
+    def every_n_inner_iters(self, runner, n):
+        return (runner.inner_iter + 1) % n == 0 if n > 0 else False
+
+    def every_n_iters(self, runner, n):
+        return (runner.iter + 1) % n == 0 if n > 0 else False
+
+    def end_of_epoch(self, runner):
+        return runner.inner_iter + 1 == len(runner.data_loader)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/iter_timer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/iter_timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd0ed91d519b8703bbb3368ead66169c5b0cb763
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/iter_timer.py
@@ -0,0 +1,18 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import time
+
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class IterTimerHook(Hook):
+
+    def before_epoch(self, runner):
+        self.t = time.time()
+
+    def before_iter(self, runner):
+        runner.log_buffer.update({'data_time': time.time() - self.t})
+
+    def after_iter(self, runner):
+        runner.log_buffer.update({'time': time.time() - self.t})
+        self.t = time.time()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fe4d814920927279247eb26cced88c222a6097e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .base import LoggerHook
+from .mlflow import MlflowLoggerHook
+from .pavi import PaviLoggerHook
+from .tensorboard import TensorboardLoggerHook
+from .text import TextLoggerHook
+from .wandb import WandbLoggerHook
+
+__all__ = [
+    'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook',
+    'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/base.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..776e7b0d3f5119f4d3f476d9c0d54108cbd309e2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/base.py
@@ -0,0 +1,166 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import numbers
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+import torch
+
+from ..hook import Hook
+
+
+class LoggerHook(Hook):
+    """Base class for logger hooks.
+
+    Args:
+        interval (int): Logging interval (every k iterations).
+        ignore_last (bool): Ignore the log of last iterations in each epoch
+            if less than `interval`.
+        reset_flag (bool): Whether to clear the output buffer after logging.
+        by_epoch (bool): Whether EpochBasedRunner is used.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=False,
+                 by_epoch=True):
+        self.interval = interval
+        self.ignore_last = ignore_last
+        self.reset_flag = reset_flag
+        self.by_epoch = by_epoch
+
+    @abstractmethod
+    def log(self, runner):
+        pass
+
+    @staticmethod
+    def is_scalar(val, include_np=True, include_torch=True):
+        """Tell the input variable is a scalar or not.
+
+        Args:
+            val: Input variable.
+            include_np (bool): Whether include 0-d np.ndarray as a scalar.
+            include_torch (bool): Whether include 0-d torch.Tensor as a scalar.
+
+        Returns:
+            bool: True or False.
+        """
+        if isinstance(val, numbers.Number):
+            return True
+        elif include_np and isinstance(val, np.ndarray) and val.ndim == 0:
+            return True
+        elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1:
+            return True
+        else:
+            return False
+
+    def get_mode(self, runner):
+        if runner.mode == 'train':
+            if 'time' in runner.log_buffer.output:
+                mode = 'train'
+            else:
+                mode = 'val'
+        elif runner.mode == 'val':
+            mode = 'val'
+        else:
+            raise ValueError(f"runner mode should be 'train' or 'val', "
+                             f'but got {runner.mode}')
+        return mode
+
+    def get_epoch(self, runner):
+        if runner.mode == 'train':
+            epoch = runner.epoch + 1
+        elif runner.mode == 'val':
+            # normal val mode
+            # runner.epoch += 1 has been done before val workflow
+            epoch = runner.epoch
+        else:
+            raise ValueError(f"runner mode should be 'train' or 'val', "
+                             f'but got {runner.mode}')
+        return epoch
+
+    def get_iter(self, runner, inner_iter=False):
+        """Get the current training iteration step."""
+        if self.by_epoch and inner_iter:
+            current_iter = runner.inner_iter + 1
+        else:
+            current_iter = runner.iter + 1
+        return current_iter
+
+    def get_lr_tags(self, runner):
+        tags = {}
+        lrs = runner.current_lr()
+        if isinstance(lrs, dict):
+            for name, value in lrs.items():
+                tags[f'learning_rate/{name}'] = value[0]
+        else:
+            tags['learning_rate'] = lrs[0]
+        return tags
+
+    def get_momentum_tags(self, runner):
+        tags = {}
+        momentums = runner.current_momentum()
+        if isinstance(momentums, dict):
+            for name, value in momentums.items():
+                tags[f'momentum/{name}'] = value[0]
+        else:
+            tags['momentum'] = momentums[0]
+        return tags
+
+    def get_loggable_tags(self,
+                          runner,
+                          allow_scalar=True,
+                          allow_text=False,
+                          add_mode=True,
+                          tags_to_skip=('time', 'data_time')):
+        tags = {}
+        for var, val in runner.log_buffer.output.items():
+            if var in tags_to_skip:
+                continue
+            if self.is_scalar(val) and not allow_scalar:
+                continue
+            if isinstance(val, str) and not allow_text:
+                continue
+            if add_mode:
+                var = f'{self.get_mode(runner)}/{var}'
+            tags[var] = val
+        tags.update(self.get_lr_tags(runner))
+        tags.update(self.get_momentum_tags(runner))
+        return tags
+
+    def before_run(self, runner):
+        for hook in runner.hooks[::-1]:
+            if isinstance(hook, LoggerHook):
+                hook.reset_flag = True
+                break
+
+    def before_epoch(self, runner):
+        runner.log_buffer.clear()  # clear logs of last epoch
+
+    def after_train_iter(self, runner):
+        if self.by_epoch and self.every_n_inner_iters(runner, self.interval):
+            runner.log_buffer.average(self.interval)
+        elif not self.by_epoch and self.every_n_iters(runner, self.interval):
+            runner.log_buffer.average(self.interval)
+        elif self.end_of_epoch(runner) and not self.ignore_last:
+            # not precise but more stable
+            runner.log_buffer.average(self.interval)
+
+        if runner.log_buffer.ready:
+            self.log(runner)
+            if self.reset_flag:
+                runner.log_buffer.clear_output()
+
+    def after_train_epoch(self, runner):
+        if runner.log_buffer.ready:
+            self.log(runner)
+            if self.reset_flag:
+                runner.log_buffer.clear_output()
+
+    def after_val_epoch(self, runner):
+        runner.log_buffer.average()
+        self.log(runner)
+        if self.reset_flag:
+            runner.log_buffer.clear_output()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/mlflow.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/mlflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..4967fec417f3294a816397cbca99cbb295bdb789
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/mlflow.py
@@ -0,0 +1,77 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from ...dist_utils import master_only
+from ..hook import HOOKS
+from .base import LoggerHook
+
+
+@HOOKS.register_module()
+class MlflowLoggerHook(LoggerHook):
+
+    def __init__(self,
+                 exp_name=None,
+                 tags=None,
+                 log_model=True,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=True,
+                 by_epoch=True):
+        """Class to log metrics and (optionally) a trained model to MLflow.
+
+        It requires `MLflow`_ to be installed.
+
+        Args:
+            exp_name (str, optional): Name of the experiment to be used.
+                Default None.
+                If not None, set the active experiment.
+                If experiment does not exist, an experiment with provided name
+                will be created.
+            tags (dict of str: str, optional): Tags for the current run.
+                Default None.
+                If not None, set tags for the current run.
+            log_model (bool, optional): Wheter to log an MLflow artifact.
+                Default True.
+                If True, log runner.model as an MLflow artifact
+                for the current run.
+            interval (int): Logging interval (every k iterations).
+            ignore_last (bool): Ignore the log of last iterations in each epoch
+                if less than `interval`.
+            reset_flag (bool): Whether to clear the output buffer after logging
+            by_epoch (bool): Whether EpochBasedRunner is used.
+
+        .. _MLflow:
+            https://www.mlflow.org/docs/latest/index.html
+        """
+        super(MlflowLoggerHook, self).__init__(interval, ignore_last,
+                                               reset_flag, by_epoch)
+        self.import_mlflow()
+        self.exp_name = exp_name
+        self.tags = tags
+        self.log_model = log_model
+
+    def import_mlflow(self):
+        try:
+            import mlflow
+            import mlflow.pytorch as mlflow_pytorch
+        except ImportError:
+            raise ImportError(
+                'Please run "pip install mlflow" to install mlflow')
+        self.mlflow = mlflow
+        self.mlflow_pytorch = mlflow_pytorch
+
+    @master_only
+    def before_run(self, runner):
+        if self.exp_name is not None:
+            self.mlflow.set_experiment(self.exp_name)
+        if self.tags is not None:
+            self.mlflow.set_tags(self.tags)
+
+    @master_only
+    def log(self, runner):
+        tags = self.get_loggable_tags(runner)
+        if tags:
+            self.mlflow.log_metrics(tags, step=self.get_iter(runner))
+
+    @master_only
+    def after_run(self, runner):
+        if self.log_model:
+            self.mlflow_pytorch.log_model(runner.model, 'models')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/pavi.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/pavi.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7c470e1304f86535c44eb578fe3139a5b8374f2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/pavi.py
@@ -0,0 +1,97 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import json
+import os
+import os.path as osp
+
+import yaml
+
+import mmcv
+from ...dist_utils import master_only
+from ..hook import HOOKS
+from .base import LoggerHook
+
+
+@HOOKS.register_module()
+class PaviLoggerHook(LoggerHook):
+
+    def __init__(self,
+                 init_kwargs=None,
+                 add_graph=False,
+                 add_last_ckpt=False,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=True,
+                 by_epoch=True):
+        super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag,
+                                             by_epoch)
+        self.init_kwargs = init_kwargs
+        self.add_graph = add_graph
+        self.add_last_ckpt = add_last_ckpt
+
+    @master_only
+    def before_run(self, runner):
+        try:
+            from pavi import SummaryWriter
+        except ImportError:
+            raise ImportError('Please run "pip install pavi" to install pavi.')
+
+        self.run_name = runner.work_dir.split('/')[-1]
+
+        if not self.init_kwargs:
+            self.init_kwargs = dict()
+        self.init_kwargs['task'] = self.run_name
+        self.init_kwargs['model'] = runner._model_name
+        if runner.meta is not None:
+            if 'config_dict' in runner.meta:
+                config_dict = runner.meta['config_dict']
+                assert isinstance(
+                    config_dict,
+                    dict), ('meta["config_dict"] has to be of a dict, '
+                            f'but got {type(config_dict)}')
+            elif 'config_file' in runner.meta:
+                config_file = runner.meta['config_file']
+                config_dict = dict(mmcv.Config.fromfile(config_file))
+            else:
+                config_dict = None
+            if config_dict is not None:
+                # 'max_.*iter' is parsed in pavi sdk as the maximum iterations
+                #  to properly set up the progress bar.
+                config_dict = config_dict.copy()
+                config_dict.setdefault('max_iter', runner.max_iters)
+                # non-serializable values are first converted in
+                # mmcv.dump to json
+                config_dict = json.loads(
+                    mmcv.dump(config_dict, file_format='json'))
+                session_text = yaml.dump(config_dict)
+                self.init_kwargs['session_text'] = session_text
+        self.writer = SummaryWriter(**self.init_kwargs)
+
+        if self.add_graph:
+            self.writer.add_graph(runner.model)
+
+    def get_step(self, runner):
+        """Get the total training step/epoch."""
+        if self.get_mode(runner) == 'val' and self.by_epoch:
+            return self.get_epoch(runner)
+        else:
+            return self.get_iter(runner)
+
+    @master_only
+    def log(self, runner):
+        tags = self.get_loggable_tags(runner, add_mode=False)
+        if tags:
+            self.writer.add_scalars(
+                self.get_mode(runner), tags, self.get_step(runner))
+
+    @master_only
+    def after_run(self, runner):
+        if self.add_last_ckpt:
+            ckpt_path = osp.join(runner.work_dir, 'latest.pth')
+            if osp.isfile(ckpt_path):
+                ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path))
+                # runner.epoch += 1 has been done before `after_run`.
+                iteration = runner.epoch if self.by_epoch else runner.iter
+                return self.writer.add_snapshot_file(
+                    tag=self.run_name,
+                    snapshot_file_path=ckpt_path,
+                    iteration=iteration)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/tensorboard.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/tensorboard.py
new file mode 100644
index 0000000000000000000000000000000000000000..abb4ac4de5c52ce83357570de1faade25c40c27f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/tensorboard.py
@@ -0,0 +1,55 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+
+from mmcv.utils import TORCH_VERSION
+from ...dist_utils import master_only
+from ..hook import HOOKS
+from .base import LoggerHook
+
+
+@HOOKS.register_module()
+class TensorboardLoggerHook(LoggerHook):
+
+    def __init__(self,
+                 log_dir=None,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=True,
+                 by_epoch=True):
+        super(TensorboardLoggerHook, self).__init__(interval, ignore_last,
+                                                    reset_flag, by_epoch)
+        self.log_dir = log_dir
+
+    @master_only
+    def before_run(self, runner):
+        if TORCH_VERSION < '1.1' or TORCH_VERSION == 'parrots':
+            try:
+                from tensorboardX import SummaryWriter
+            except ImportError:
+                raise ImportError('Please install tensorboardX to use '
+                                  'TensorboardLoggerHook.')
+        else:
+            try:
+                from torch.utils.tensorboard import SummaryWriter
+            except ImportError:
+                raise ImportError(
+                    'Please run "pip install future tensorboard" to install '
+                    'the dependencies to use torch.utils.tensorboard '
+                    '(applicable to PyTorch 1.1 or higher)')
+
+        if self.log_dir is None:
+            self.log_dir = osp.join(runner.work_dir, 'tf_logs')
+        self.writer = SummaryWriter(self.log_dir)
+
+    @master_only
+    def log(self, runner):
+        tags = self.get_loggable_tags(runner, allow_text=True)
+        for tag, val in tags.items():
+            if isinstance(val, str):
+                self.writer.add_text(tag, val, self.get_iter(runner))
+            else:
+                self.writer.add_scalar(tag, val, self.get_iter(runner))
+
+    @master_only
+    def after_run(self, runner):
+        self.writer.close()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/text.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/text.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c821d09c4492239a34961b0ec240ee2e99e2d7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/text.py
@@ -0,0 +1,171 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import datetime
+import os.path as osp
+import os
+from collections import OrderedDict
+
+import torch
+import torch.distributed as dist
+
+import mmcv
+from ..hook import HOOKS
+from .base import LoggerHook
+
+
+@HOOKS.register_module()
+class TextLoggerHook(LoggerHook):
+    """Logger hook in text.
+
+    In this logger hook, the information will be printed on terminal and
+    saved in json file.
+
+    Args:
+        by_epoch (bool): Whether EpochBasedRunner is used.
+        interval (int): Logging interval (every k iterations).
+        ignore_last (bool): Ignore the log of last iterations in each epoch
+            if less than `interval`.
+        reset_flag (bool): Whether to clear the output buffer after logging.
+        interval_exp_name (int): Logging interval for experiment name. This
+            feature is to help users conveniently get the experiment
+            information from screen or log file. Default: 1000.
+    """
+
+    def __init__(self,
+                 by_epoch=True,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=False,
+                 interval_exp_name=1000):
+        super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag,
+                                             by_epoch)
+        self.by_epoch = by_epoch
+        self.time_sec_tot = 0
+        self.interval_exp_name = interval_exp_name
+
+    def before_run(self, runner):
+        super(TextLoggerHook, self).before_run(runner)
+        self.start_iter = runner.iter
+        self.json_log_path = osp.join(runner.work_dir,
+                                      f'{runner.timestamp}.log.json')
+        if runner.meta is not None:
+            self._dump_log(runner.meta, runner)
+
+    def _get_max_memory(self, runner):
+        device = getattr(runner.model, 'output_device', None)
+        mem = torch.cuda.max_memory_allocated(device=device)
+        mem_mb = torch.tensor([mem / (1024 * 1024)],
+                              dtype=torch.int,
+                              device=device)
+        if runner.world_size > 1:
+            dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX)
+        return mem_mb.item()
+
+    def _log_info(self, log_dict, runner):
+        # print exp name for users to distinguish experiments
+        # at every ``interval_exp_name`` iterations and the end of each epoch
+        if runner.meta is not None and 'exp_name' in runner.meta:
+            if (self.every_n_iters(runner, self.interval_exp_name)) or (
+                    self.by_epoch and self.end_of_epoch(runner)):
+                exp_info = f'Exp name: {runner.meta["exp_name"]}'
+                runner.logger.info(exp_info)
+
+        if log_dict['mode'] == 'train':
+            if isinstance(log_dict['lr'], dict):
+                lr_str = []
+                for k, val in log_dict['lr'].items():
+                    lr_str.append(f'lr_{k}: {val:.3e}')
+                lr_str = ' '.join(lr_str)
+            else:
+                lr_str = f'lr: {log_dict["lr"]:.3e}'
+
+            # by epoch: Epoch [4][100/1000]
+            # by iter:  Iter [100/100000]
+            if self.by_epoch:
+                log_str = f'Epoch [{log_dict["epoch"]}]' \
+                          f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
+            else:
+                log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
+            log_str += f'{lr_str}, '
+
+            if 'time' in log_dict.keys():
+                self.time_sec_tot += (log_dict['time'] * self.interval)
+                time_sec_avg = self.time_sec_tot / (
+                    runner.iter - self.start_iter + 1)
+                eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
+                eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
+                batchsize = int(os.environ['BATCH_SIZE'])
+                fps = (1 / float(log_dict["time"])) * batchsize
+                log_str += f'eta: {eta_str}, '
+                log_str += f'FPS: {fps:.3f}, ' \
+                           f'data_time: {log_dict["data_time"]:.3f}, '
+                # statistic memory
+                if torch.cuda.is_available():
+                    log_str += f'memory: {log_dict["memory"]}, '
+        else:
+            if self.by_epoch:
+                log_str = f'Epoch({log_dict["mode"]}) ' \
+                    f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
+            else:
+                log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
+
+        log_items = []
+        for name, val in log_dict.items():
+            # TODO: resolve this hack
+            # these items have been in log_str
+            if name in [
+                    'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time',
+                    'memory', 'epoch'
+            ]:
+                continue
+            if isinstance(val, float):
+                val = f'{val:.4f}'
+            log_items.append(f'{name}: {val}')
+        log_str += ', '.join(log_items)
+
+        runner.logger.info(log_str)
+
+    def _dump_log(self, log_dict, runner):
+        # dump log in json format
+        json_log = OrderedDict()
+        for k, v in log_dict.items():
+            json_log[k] = self._round_float(v)
+        # only append log at last line
+        if runner.rank == 0:
+            with open(self.json_log_path, 'a+') as f:
+                mmcv.dump(json_log, f, file_format='json')
+                f.write('\n')
+
+    def _round_float(self, items):
+        if isinstance(items, list):
+            return [self._round_float(item) for item in items]
+        elif isinstance(items, float):
+            return round(items, 5)
+        else:
+            return items
+
+    def log(self, runner):
+        log_dict = OrderedDict(
+            mode=self.get_mode(runner),
+            epoch=self.get_epoch(runner),
+            iter=self.get_iter(runner, inner_iter=True))
+
+        # only record lr of the first param group
+        cur_lr = runner.current_lr()
+        if isinstance(cur_lr, list):
+            log_dict['lr'] = cur_lr[0]
+        else:
+            assert isinstance(cur_lr, dict)
+            log_dict['lr'] = {}
+            for k, lr_ in cur_lr.items():
+                assert isinstance(lr_, list)
+                log_dict['lr'].update({k: lr_[0]})
+
+        if 'time' in runner.log_buffer.output:
+            # statistic memory
+            if torch.cuda.is_available():
+                log_dict['memory'] = self._get_max_memory(runner)
+
+        log_dict = dict(log_dict, **runner.log_buffer.output)
+
+        self._log_info(log_dict, runner)
+        self._dump_log(log_dict, runner)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/wandb.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/wandb.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce8364d0a69a94b26ca907f62ea4137e7590d04c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/logger/wandb.py
@@ -0,0 +1,49 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from ...dist_utils import master_only
+from ..hook import HOOKS
+from .base import LoggerHook
+
+
+@HOOKS.register_module()
+class WandbLoggerHook(LoggerHook):
+
+    def __init__(self,
+                 init_kwargs=None,
+                 interval=10,
+                 ignore_last=True,
+                 reset_flag=True,
+                 commit=True,
+                 by_epoch=True):
+        super(WandbLoggerHook, self).__init__(interval, ignore_last,
+                                              reset_flag, by_epoch)
+        self.import_wandb()
+        self.init_kwargs = init_kwargs
+        self.commit = commit
+
+    def import_wandb(self):
+        try:
+            import wandb
+        except ImportError:
+            raise ImportError(
+                'Please run "pip install wandb" to install wandb')
+        self.wandb = wandb
+
+    @master_only
+    def before_run(self, runner):
+        if self.wandb is None:
+            self.import_wandb()
+        if self.init_kwargs:
+            self.wandb.init(**self.init_kwargs)
+        else:
+            self.wandb.init()
+
+    @master_only
+    def log(self, runner):
+        tags = self.get_loggable_tags(runner)
+        if tags:
+            self.wandb.log(
+                tags, step=self.get_iter(runner), commit=self.commit)
+
+    @master_only
+    def after_run(self, runner):
+        self.wandb.join()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/lr_updater.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/lr_updater.py
new file mode 100644
index 0000000000000000000000000000000000000000..0120b58d484c7331582b734337bacc6743c8fe96
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/lr_updater.py
@@ -0,0 +1,416 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from math import cos, pi
+
+from .hook import HOOKS, Hook
+
+
+class LrUpdaterHook(Hook):
+    """LR Scheduler in MMCV.
+
+    Args:
+        by_epoch (bool): LR changes epoch by epoch
+        warmup (string): Type of warmup used. It can be None(use no warmup),
+            'constant', 'linear' or 'exp'
+        warmup_iters (int): The number of iterations or epochs that warmup
+            lasts
+        warmup_ratio (float): LR used at the beginning of warmup equals to
+            warmup_ratio * initial_lr
+        warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters
+            means the number of epochs that warmup lasts, otherwise means the
+            number of iteration that warmup lasts
+    """
+
+    def __init__(self,
+                 by_epoch=True,
+                 warmup=None,
+                 warmup_iters=0,
+                 warmup_ratio=0.1,
+                 warmup_by_epoch=False):
+        # validate the "warmup" argument
+        if warmup is not None:
+            if warmup not in ['constant', 'linear', 'exp']:
+                raise ValueError(
+                    f'"{warmup}" is not a supported type for warming up, valid'
+                    ' types are "constant" and "linear"')
+        if warmup is not None:
+            assert warmup_iters > 0, \
+                '"warmup_iters" must be a positive integer'
+            assert 0 < warmup_ratio <= 1.0, \
+                '"warmup_ratio" must be in range (0,1]'
+
+        self.by_epoch = by_epoch
+        self.warmup = warmup
+        self.warmup_iters = warmup_iters
+        self.warmup_ratio = warmup_ratio
+        self.warmup_by_epoch = warmup_by_epoch
+
+        if self.warmup_by_epoch:
+            self.warmup_epochs = self.warmup_iters
+            self.warmup_iters = None
+        else:
+            self.warmup_epochs = None
+
+        self.base_lr = []  # initial lr for all param groups
+        self.regular_lr = []  # expected lr if no warming up is performed
+
+    def _set_lr(self, runner, lr_groups):
+        if isinstance(runner.optimizer, dict):
+            for k, optim in runner.optimizer.items():
+                for param_group, lr in zip(optim.param_groups, lr_groups[k]):
+                    param_group['lr'] = lr
+        else:
+            for param_group, lr in zip(runner.optimizer.param_groups,
+                                       lr_groups):
+                param_group['lr'] = lr
+
+    def get_lr(self, runner, base_lr):
+        raise NotImplementedError
+
+    def get_regular_lr(self, runner):
+        if isinstance(runner.optimizer, dict):
+            lr_groups = {}
+            for k in runner.optimizer.keys():
+                _lr_group = [
+                    self.get_lr(runner, _base_lr)
+                    for _base_lr in self.base_lr[k]
+                ]
+                lr_groups.update({k: _lr_group})
+
+            return lr_groups
+        else:
+            return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr]
+
+    def get_warmup_lr(self, cur_iters):
+        if self.warmup == 'constant':
+            warmup_lr = [_lr * self.warmup_ratio for _lr in self.regular_lr]
+        elif self.warmup == 'linear':
+            k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio)
+            warmup_lr = [_lr * (1 - k) for _lr in self.regular_lr]
+        elif self.warmup == 'exp':
+            k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters)
+            warmup_lr = [_lr * k for _lr in self.regular_lr]
+        return warmup_lr
+
+    def before_run(self, runner):
+        # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved,
+        # it will be set according to the optimizer params
+        if isinstance(runner.optimizer, dict):
+            self.base_lr = {}
+            for k, optim in runner.optimizer.items():
+                for group in optim.param_groups:
+                    group.setdefault('initial_lr', group['lr'])
+                _base_lr = [
+                    group['initial_lr'] for group in optim.param_groups
+                ]
+                self.base_lr.update({k: _base_lr})
+        else:
+            for group in runner.optimizer.param_groups:
+                group.setdefault('initial_lr', group['lr'])
+            self.base_lr = [
+                group['initial_lr'] for group in runner.optimizer.param_groups
+            ]
+
+    def before_train_epoch(self, runner):
+        if self.warmup_iters is None:
+            epoch_len = len(runner.data_loader)
+            self.warmup_iters = self.warmup_epochs * epoch_len
+
+        if not self.by_epoch:
+            return
+
+        self.regular_lr = self.get_regular_lr(runner)
+        self._set_lr(runner, self.regular_lr)
+
+    def before_train_iter(self, runner):
+        cur_iter = runner.iter
+        if not self.by_epoch:
+            self.regular_lr = self.get_regular_lr(runner)
+            if self.warmup is None or cur_iter >= self.warmup_iters:
+                self._set_lr(runner, self.regular_lr)
+            else:
+                warmup_lr = self.get_warmup_lr(cur_iter)
+                self._set_lr(runner, warmup_lr)
+        elif self.by_epoch:
+            if self.warmup is None or cur_iter > self.warmup_iters:
+                return
+            elif cur_iter == self.warmup_iters:
+                self._set_lr(runner, self.regular_lr)
+            else:
+                warmup_lr = self.get_warmup_lr(cur_iter)
+                self._set_lr(runner, warmup_lr)
+
+
+@HOOKS.register_module()
+class FixedLrUpdaterHook(LrUpdaterHook):
+
+    def __init__(self, **kwargs):
+        super(FixedLrUpdaterHook, self).__init__(**kwargs)
+
+    def get_lr(self, runner, base_lr):
+        return base_lr
+
+
+@HOOKS.register_module()
+class StepLrUpdaterHook(LrUpdaterHook):
+
+    def __init__(self, step, gamma=0.1, **kwargs):
+        assert isinstance(step, (list, int))
+        if isinstance(step, list):
+            for s in step:
+                assert isinstance(s, int) and s > 0
+        elif isinstance(step, int):
+            assert step > 0
+        else:
+            raise TypeError('"step" must be a list or integer')
+        self.step = step
+        self.gamma = gamma
+        super(StepLrUpdaterHook, self).__init__(**kwargs)
+
+    def get_lr(self, runner, base_lr):
+        progress = runner.epoch if self.by_epoch else runner.iter
+
+        if isinstance(self.step, int):
+            return base_lr * (self.gamma**(progress // self.step))
+
+        exp = len(self.step)
+        for i, s in enumerate(self.step):
+            if progress < s:
+                exp = i
+                break
+        return base_lr * self.gamma**exp
+
+
+@HOOKS.register_module()
+class ExpLrUpdaterHook(LrUpdaterHook):
+
+    def __init__(self, gamma, **kwargs):
+        self.gamma = gamma
+        super(ExpLrUpdaterHook, self).__init__(**kwargs)
+
+    def get_lr(self, runner, base_lr):
+        progress = runner.epoch if self.by_epoch else runner.iter
+        return base_lr * self.gamma**progress
+
+
+@HOOKS.register_module()
+class PolyLrUpdaterHook(LrUpdaterHook):
+
+    def __init__(self, power=1., min_lr=0., **kwargs):
+        self.power = power
+        self.min_lr = min_lr
+        super(PolyLrUpdaterHook, self).__init__(**kwargs)
+
+    def get_lr(self, runner, base_lr):
+        if self.by_epoch:
+            progress = runner.epoch
+            max_progress = runner.max_epochs
+        else:
+            progress = runner.iter
+            max_progress = runner.max_iters
+        coeff = (1 - progress / max_progress)**self.power
+        return (base_lr - self.min_lr) * coeff + self.min_lr
+
+
+@HOOKS.register_module()
+class InvLrUpdaterHook(LrUpdaterHook):
+
+    def __init__(self, gamma, power=1., **kwargs):
+        self.gamma = gamma
+        self.power = power
+        super(InvLrUpdaterHook, self).__init__(**kwargs)
+
+    def get_lr(self, runner, base_lr):
+        progress = runner.epoch if self.by_epoch else runner.iter
+        return base_lr * (1 + self.gamma * progress)**(-self.power)
+
+
+@HOOKS.register_module()
+class CosineAnnealingLrUpdaterHook(LrUpdaterHook):
+
+    def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs):
+        assert (min_lr is None) ^ (min_lr_ratio is None)
+        self.min_lr = min_lr
+        self.min_lr_ratio = min_lr_ratio
+        super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs)
+
+    def get_lr(self, runner, base_lr):
+        if self.by_epoch:
+            progress = runner.epoch
+            max_progress = runner.max_epochs
+        else:
+            progress = runner.iter
+            max_progress = runner.max_iters
+
+        if self.min_lr_ratio is not None:
+            target_lr = base_lr * self.min_lr_ratio
+        else:
+            target_lr = self.min_lr
+        return annealing_cos(base_lr, target_lr, progress / max_progress)
+
+
+@HOOKS.register_module()
+class CosineRestartLrUpdaterHook(LrUpdaterHook):
+    """Cosine annealing with restarts learning rate scheme.
+
+    Args:
+        periods (list[int]): Periods for each cosine anneling cycle.
+        restart_weights (list[float], optional): Restart weights at each
+            restart iteration. Default: [1].
+        min_lr (float, optional): The minimum lr. Default: None.
+        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
+            Either `min_lr` or `min_lr_ratio` should be specified.
+            Default: None.
+    """
+
+    def __init__(self,
+                 periods,
+                 restart_weights=[1],
+                 min_lr=None,
+                 min_lr_ratio=None,
+                 **kwargs):
+        assert (min_lr is None) ^ (min_lr_ratio is None)
+        self.periods = periods
+        self.min_lr = min_lr
+        self.min_lr_ratio = min_lr_ratio
+        self.restart_weights = restart_weights
+        assert (len(self.periods) == len(self.restart_weights)
+                ), 'periods and restart_weights should have the same length.'
+        super(CosineRestartLrUpdaterHook, self).__init__(**kwargs)
+
+        self.cumulative_periods = [
+            sum(self.periods[0:i + 1]) for i in range(0, len(self.periods))
+        ]
+
+    def get_lr(self, runner, base_lr):
+        if self.by_epoch:
+            progress = runner.epoch
+        else:
+            progress = runner.iter
+
+        if self.min_lr_ratio is not None:
+            target_lr = base_lr * self.min_lr_ratio
+        else:
+            target_lr = self.min_lr
+
+        idx = get_position_from_periods(progress, self.cumulative_periods)
+        current_weight = self.restart_weights[idx]
+        nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1]
+        current_periods = self.periods[idx]
+
+        alpha = min((progress - nearest_restart) / current_periods, 1)
+        return annealing_cos(base_lr, target_lr, alpha, current_weight)
+
+
+def get_position_from_periods(iteration, cumulative_periods):
+    """Get the position from a period list.
+
+    It will return the index of the right-closest number in the period list.
+    For example, the cumulative_periods = [100, 200, 300, 400],
+    if iteration == 50, return 0;
+    if iteration == 210, return 2;
+    if iteration == 300, return 3.
+
+    Args:
+        iteration (int): Current iteration.
+        cumulative_periods (list[int]): Cumulative period list.
+
+    Returns:
+        int: The position of the right-closest number in the period list.
+    """
+    for i, period in enumerate(cumulative_periods):
+        if iteration < period:
+            return i
+    raise ValueError(f'Current iteration {iteration} exceeds '
+                     f'cumulative_periods {cumulative_periods}')
+
+
+@HOOKS.register_module()
+class CyclicLrUpdaterHook(LrUpdaterHook):
+    """Cyclic LR Scheduler.
+
+    Implement the cyclical learning rate policy (CLR) described in
+    https://arxiv.org/pdf/1506.01186.pdf
+
+    Different from the original paper, we use cosine anealing rather than
+    triangular policy inside a cycle. This improves the performance in the
+    3D detection area.
+
+    Attributes:
+        target_ratio (tuple[float]): Relative ratio of the highest LR and the
+            lowest LR to the initial LR.
+        cyclic_times (int): Number of cycles during training
+        step_ratio_up (float): The ratio of the increasing process of LR in
+            the total cycle.
+        by_epoch (bool): Whether to update LR by epoch.
+    """
+
+    def __init__(self,
+                 by_epoch=False,
+                 target_ratio=(10, 1e-4),
+                 cyclic_times=1,
+                 step_ratio_up=0.4,
+                 **kwargs):
+        if isinstance(target_ratio, float):
+            target_ratio = (target_ratio, target_ratio / 1e5)
+        elif isinstance(target_ratio, tuple):
+            target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \
+                if len(target_ratio) == 1 else target_ratio
+        else:
+            raise ValueError('target_ratio should be either float '
+                             f'or tuple, got {type(target_ratio)}')
+
+        assert len(target_ratio) == 2, \
+            '"target_ratio" must be list or tuple of two floats'
+        assert 0 <= step_ratio_up < 1.0, \
+            '"step_ratio_up" must be in range [0,1)'
+
+        self.target_ratio = target_ratio
+        self.cyclic_times = cyclic_times
+        self.step_ratio_up = step_ratio_up
+        self.lr_phases = []  # init lr_phases
+
+        assert not by_epoch, \
+            'currently only support "by_epoch" = False'
+        super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs)
+
+    def before_run(self, runner):
+        super(CyclicLrUpdaterHook, self).before_run(runner)
+        # initiate lr_phases
+        # total lr_phases are separated as up and down
+        max_iter_per_phase = runner.max_iters // self.cyclic_times
+        iter_up_phase = int(self.step_ratio_up * max_iter_per_phase)
+        self.lr_phases.append(
+            [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]])
+        self.lr_phases.append([
+            iter_up_phase, max_iter_per_phase, max_iter_per_phase,
+            self.target_ratio[0], self.target_ratio[1]
+        ])
+
+    def get_lr(self, runner, base_lr):
+        curr_iter = runner.iter
+        for (start_iter, end_iter, max_iter_per_phase, start_ratio,
+             end_ratio) in self.lr_phases:
+            curr_iter %= max_iter_per_phase
+            if start_iter <= curr_iter < end_iter:
+                progress = curr_iter - start_iter
+                return annealing_cos(base_lr * start_ratio,
+                                     base_lr * end_ratio,
+                                     progress / (end_iter - start_iter))
+
+
+def annealing_cos(start, end, factor, weight=1):
+    """Calculate annealing cos learning rate.
+
+    Cosine anneal from `weight * start + (1 - weight) * end` to `end` as
+    percentage goes from 0.0 to 1.0.
+
+    Args:
+        start (float): The starting learning rate of the cosine annealing.
+        end (float): The ending learing rate of the cosine annealing.
+        factor (float): The coefficient of `pi` when calculating the current
+            percentage. Range from 0.0 to 1.0.
+        weight (float, optional): The combination factor of `start` and `end`
+            when calculating the actual starting learning rate. Default to 1.
+    """
+    cos_out = cos(pi * factor) + 1
+    return end + 0.5 * weight * (start - end) * cos_out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/memory.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/memory.py
new file mode 100644
index 0000000000000000000000000000000000000000..966ee07d0e591cfb9d61ea20ebad9105bc499438
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/memory.py
@@ -0,0 +1,25 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import torch
+
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class EmptyCacheHook(Hook):
+
+    def __init__(self, before_epoch=False, after_epoch=True, after_iter=False):
+        self._before_epoch = before_epoch
+        self._after_epoch = after_epoch
+        self._after_iter = after_iter
+
+    def after_iter(self, runner):
+        if self._after_iter:
+            torch.cuda.empty_cache()
+
+    def before_epoch(self, runner):
+        if self._before_epoch:
+            torch.cuda.empty_cache()
+
+    def after_epoch(self, runner):
+        if self._after_epoch:
+            torch.cuda.empty_cache()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/momentum_updater.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/momentum_updater.py
new file mode 100644
index 0000000000000000000000000000000000000000..b349071ee6fd75b7027bf07614fac6371d350efe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/momentum_updater.py
@@ -0,0 +1,199 @@
+from .hook import HOOKS, Hook
+from .lr_updater import annealing_cos
+
+
+class MomentumUpdaterHook(Hook):
+
+    def __init__(self,
+                 by_epoch=True,
+                 warmup=None,
+                 warmup_iters=0,
+                 warmup_ratio=0.9):
+        # validate the "warmup" argument
+        if warmup is not None:
+            if warmup not in ['constant', 'linear', 'exp']:
+                raise ValueError(
+                    f'"{warmup}" is not a supported type for warming up, valid'
+                    ' types are "constant" and "linear"')
+        if warmup is not None:
+            assert warmup_iters > 0, \
+                '"warmup_iters" must be a positive integer'
+            assert 0 < warmup_ratio <= 1.0, \
+                '"warmup_momentum" must be in range (0,1]'
+
+        self.by_epoch = by_epoch
+        self.warmup = warmup
+        self.warmup_iters = warmup_iters
+        self.warmup_ratio = warmup_ratio
+
+        self.base_momentum = []  # initial momentum for all param groups
+        self.regular_momentum = [
+        ]  # expected momentum if no warming up is performed
+
+    def _set_momentum(self, runner, momentum_groups):
+        for param_group, mom in zip(runner.optimizer.param_groups,
+                                    momentum_groups):
+            if 'momentum' in param_group.keys():
+                param_group['momentum'] = mom
+            elif 'betas' in param_group.keys():
+                param_group['betas'] = (mom, param_group['betas'][1])
+
+    def get_momentum(self, runner, base_momentum):
+        raise NotImplementedError
+
+    def get_regular_momentum(self, runner):
+        return [
+            self.get_momentum(runner, _base_momentum)
+            for _base_momentum in self.base_momentum
+        ]
+
+    def get_warmup_momentum(self, cur_iters):
+        if self.warmup == 'constant':
+            warmup_momentum = [
+                _momentum / self.warmup_ratio
+                for _momentum in self.regular_momentum
+            ]
+        elif self.warmup == 'linear':
+            k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio)
+            warmup_momentum = [
+                _momentum / (1 - k) for _momentum in self.regular_mom
+            ]
+        elif self.warmup == 'exp':
+            k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters)
+            warmup_momentum = [_momentum / k for _momentum in self.regular_mom]
+        return warmup_momentum
+
+    def before_run(self, runner):
+        # NOTE: when resuming from a checkpoint,
+        # if 'initial_momentum' is not saved,
+        # it will be set according to the optimizer params
+        for group in runner.optimizer.param_groups:
+            if 'momentum' in group.keys():
+                group.setdefault('initial_momentum', group['momentum'])
+            else:
+                group.setdefault('initial_momentum', group['betas'][0])
+        self.base_momentum = [
+            group['initial_momentum']
+            for group in runner.optimizer.param_groups
+        ]
+
+    def before_train_epoch(self, runner):
+        if not self.by_epoch:
+            return
+        self.regular_mom = self.get_regular_momentum(runner)
+        self._set_momentum(runner, self.regular_mom)
+
+    def before_train_iter(self, runner):
+        cur_iter = runner.iter
+        if not self.by_epoch:
+            self.regular_mom = self.get_regular_momentum(runner)
+            if self.warmup is None or cur_iter >= self.warmup_iters:
+                self._set_momentum(runner, self.regular_mom)
+            else:
+                warmup_momentum = self.get_warmup_momentum(cur_iter)
+                self._set_momentum(runner, warmup_momentum)
+        elif self.by_epoch:
+            if self.warmup is None or cur_iter > self.warmup_iters:
+                return
+            elif cur_iter == self.warmup_iters:
+                self._set_momentum(runner, self.regular_mom)
+            else:
+                warmup_momentum = self.get_warmup_momentum(cur_iter)
+                self._set_momentum(runner, warmup_momentum)
+
+
+@HOOKS.register_module()
+class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook):
+
+    def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs):
+        assert (min_momentum is None) ^ (min_momentum_ratio is None)
+        self.min_momentum = min_momentum
+        self.min_momentum_ratio = min_momentum_ratio
+        super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs)
+
+    def get_momentum(self, runner, base_momentum):
+        if self.by_epoch:
+            progress = runner.epoch
+            max_progress = runner.max_epochs
+        else:
+            progress = runner.iter
+            max_progress = runner.max_iters
+        if self.min_momentum_ratio is not None:
+            target_momentum = base_momentum * self.min_momentum_ratio
+        else:
+            target_momentum = self.min_momentum
+        return annealing_cos(base_momentum, target_momentum,
+                             progress / max_progress)
+
+
+@HOOKS.register_module()
+class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
+    """Cyclic momentum Scheduler.
+
+    Implemet the cyclical momentum scheduler policy described in
+    https://arxiv.org/pdf/1708.07120.pdf
+
+    This momentum scheduler usually used together with the CyclicLRUpdater
+    to improve the performance in the 3D detection area.
+
+    Attributes:
+        target_ratio (tuple[float]): Relative ratio of the lowest momentum and
+            the highest momentum to the initial momentum.
+        cyclic_times (int): Number of cycles during training
+        step_ratio_up (float): The ratio of the increasing process of momentum
+            in  the total cycle.
+        by_epoch (bool): Whether to update momentum by epoch.
+    """
+
+    def __init__(self,
+                 by_epoch=False,
+                 target_ratio=(0.85 / 0.95, 1),
+                 cyclic_times=1,
+                 step_ratio_up=0.4,
+                 **kwargs):
+        if isinstance(target_ratio, float):
+            target_ratio = (target_ratio, target_ratio / 1e5)
+        elif isinstance(target_ratio, tuple):
+            target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \
+                if len(target_ratio) == 1 else target_ratio
+        else:
+            raise ValueError('target_ratio should be either float '
+                             f'or tuple, got {type(target_ratio)}')
+
+        assert len(target_ratio) == 2, \
+            '"target_ratio" must be list or tuple of two floats'
+        assert 0 <= step_ratio_up < 1.0, \
+            '"step_ratio_up" must be in range [0,1)'
+
+        self.target_ratio = target_ratio
+        self.cyclic_times = cyclic_times
+        self.step_ratio_up = step_ratio_up
+        self.momentum_phases = []  # init momentum_phases
+        # currently only support by_epoch=False
+        assert not by_epoch, \
+            'currently only support "by_epoch" = False'
+        super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs)
+
+    def before_run(self, runner):
+        super(CyclicMomentumUpdaterHook, self).before_run(runner)
+        # initiate momentum_phases
+        # total momentum_phases are separated as up and down
+        max_iter_per_phase = runner.max_iters // self.cyclic_times
+        iter_up_phase = int(self.step_ratio_up * max_iter_per_phase)
+        self.momentum_phases.append(
+            [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]])
+        self.momentum_phases.append([
+            iter_up_phase, max_iter_per_phase, max_iter_per_phase,
+            self.target_ratio[0], self.target_ratio[1]
+        ])
+
+    def get_momentum(self, runner, base_momentum):
+        curr_iter = runner.iter
+        for (start_iter, end_iter, max_iter_per_phase, start_ratio,
+             end_ratio) in self.momentum_phases:
+            curr_iter %= max_iter_per_phase
+            if start_iter <= curr_iter < end_iter:
+                progress = curr_iter - start_iter
+                return annealing_cos(base_momentum * start_ratio,
+                                     base_momentum * end_ratio,
+                                     progress / (end_iter - start_iter))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/optimizer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd93a93de7b5d3ba2500b06ffc8d9f088efefa9d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/optimizer.py
@@ -0,0 +1,166 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import copy
+from collections import defaultdict
+from itertools import chain
+from apex import amp
+import torch
+
+from torch.nn.utils import clip_grad
+
+from ..dist_utils import allreduce_grads
+from ..fp16_utils import LossScaler, wrap_fp16_model
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class OptimizerHook(Hook):
+
+    def __init__(self, grad_clip=None):
+        self.grad_clip = grad_clip
+
+    def clip_grads(self, params):
+        params = list(
+            filter(lambda p: p.requires_grad and p.grad is not None, params))
+        if len(params) > 0:
+            return clip_grad.clip_grad_norm_(params, **self.grad_clip)
+
+    def after_train_iter(self, runner):
+        runner.optimizer.zero_grad()
+        #runner.outputs['loss'].backward()
+        with amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
+            scaled_loss.backward()
+        if self.grad_clip is not None:
+            grad_norm = self.clip_grads(runner.model.parameters())
+            if grad_norm is not None:
+                # Add grad norm to the logger
+                runner.log_buffer.update({'grad_norm': float(grad_norm)},
+                                         runner.outputs['num_samples'])
+        torch.npu.synchronize()
+        runner.optimizer.step()
+
+
+@HOOKS.register_module()
+class Fp16OptimizerHook(OptimizerHook):
+    """FP16 optimizer hook.
+
+    The steps of fp16 optimizer is as follows.
+    1. Scale the loss value.
+    2. BP in the fp16 model.
+    2. Copy gradients from fp16 model to fp32 weights.
+    3. Update fp32 weights.
+    4. Copy updated parameters from fp32 weights to fp16 model.
+
+    Refer to https://arxiv.org/abs/1710.03740 for more details.
+
+    Args:
+        loss_scale (float | str | dict): Scale factor multiplied with loss.
+            If loss_scale is a float, static loss scaling will be used with
+            the specified scale. If loss_scale is a string, it must be
+            'dynamic', then dynamic loss scaling will be used.
+            It can also be a dict containing arguments of LossScaler.
+            Defaults to 512.
+    """
+
+    def __init__(self,
+                 grad_clip=None,
+                 coalesce=True,
+                 bucket_size_mb=-1,
+                 loss_scale=512.,
+                 distributed=True):
+        self.grad_clip = grad_clip
+        self.coalesce = coalesce
+        self.bucket_size_mb = bucket_size_mb
+        self.distributed = distributed
+        if loss_scale == 'dynamic':
+            self.loss_scaler = LossScaler(mode='dynamic')
+        elif isinstance(loss_scale, float):
+            self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static')
+        elif isinstance(loss_scale, dict):
+            self.loss_scaler = LossScaler(**loss_scale)
+        else:
+            raise ValueError('loss_scale must be of type float, dict, or '
+                             f'"dynamic", got {loss_scale}')
+
+    def before_run(self, runner):
+        """Preparing steps before Mixed Precision Training.
+
+        1. Make a master copy of fp32 weights for optimization.
+        2. Convert the main model from fp32 to fp16.
+        """
+        # keep a copy of fp32 weights
+        old_groups = runner.optimizer.param_groups
+        runner.optimizer.param_groups = copy.deepcopy(
+            runner.optimizer.param_groups)
+        state = defaultdict(dict)
+        p_map = {
+            old_p: p
+            for old_p, p in zip(
+                chain(*(g['params'] for g in old_groups)),
+                chain(*(g['params'] for g in runner.optimizer.param_groups)))
+        }
+        for k, v in runner.optimizer.state.items():
+            state[p_map[k]] = v
+        runner.optimizer.state = state
+        # convert model to fp16
+        wrap_fp16_model(runner.model)
+
+    def copy_grads_to_fp32(self, fp16_net, fp32_weights):
+        """Copy gradients from fp16 model to fp32 weight copy."""
+        for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()):
+            if fp16_param.grad is not None:
+                if fp32_param.grad is None:
+                    fp32_param.grad = fp32_param.data.new(fp32_param.size())
+                fp32_param.grad.copy_(fp16_param.grad)
+
+    def copy_params_to_fp16(self, fp16_net, fp32_weights):
+        """Copy updated params from fp32 weight copy to fp16 model."""
+        for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights):
+            fp16_param.data.copy_(fp32_param.data)
+
+    def after_train_iter(self, runner):
+        """Backward optimization steps for Mixed Precision Training. For
+        dynamic loss scaling, please refer `loss_scalar.py`
+
+        1. Scale the loss by a scale factor.
+        2. Backward the loss to obtain the gradients (fp16).
+        3. Copy gradients from the model to the fp32 weight copy.
+        4. Scale the gradients back and update the fp32 weight copy.
+        5. Copy back the params from fp32 weight copy to the fp16 model.
+        """
+        # clear grads of last iteration
+        runner.model.zero_grad()
+        runner.optimizer.zero_grad()
+        # scale the loss value
+        scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale
+        scaled_loss.backward()
+        # copy fp16 grads in the model to fp32 params in the optimizer
+
+        fp32_weights = []
+        for param_group in runner.optimizer.param_groups:
+            fp32_weights += param_group['params']
+        self.copy_grads_to_fp32(runner.model, fp32_weights)
+        # allreduce grads
+        if self.distributed:
+            allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb)
+
+        has_overflow = self.loss_scaler.has_overflow(fp32_weights)
+        # if has overflow, skip this iteration
+        if not has_overflow:
+            # scale the gradients back
+            for param in fp32_weights:
+                if param.grad is not None:
+                    param.grad.div_(self.loss_scaler.loss_scale)
+            if self.grad_clip is not None:
+                grad_norm = self.clip_grads(fp32_weights)
+                if grad_norm is not None:
+                    # Add grad norm to the logger
+                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
+                                             runner.outputs['num_samples'])
+            # update fp32 params
+            runner.optimizer.step()
+            # copy fp32 params to the fp16 model
+            self.copy_params_to_fp16(runner.model, fp32_weights)
+        self.loss_scaler.update_scale(has_overflow)
+        if has_overflow:
+            runner.logger.warning('Check overflow, downscale loss scale '
+                                  f'to {self.loss_scaler.cur_scale}')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/sampler_seed.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/sampler_seed.py
new file mode 100644
index 0000000000000000000000000000000000000000..535d801ec5b867f7674fc6221df93fc9377c1d3f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/sampler_seed.py
@@ -0,0 +1,14 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class DistSamplerSeedHook(Hook):
+
+    def before_epoch(self, runner):
+        if hasattr(runner.data_loader.sampler, 'set_epoch'):
+            # in case the data loader uses `SequentialSampler` in Pytorch
+            runner.data_loader.sampler.set_epoch(runner.epoch)
+        elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'):
+            # batch sampler in pytorch warps the sampler as its attributes.
+            runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/sync_buffer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/sync_buffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f58271570f9e5d51d9b1d5e89328ee8194a0ce5d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/hooks/sync_buffer.py
@@ -0,0 +1,22 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from ..dist_utils import allreduce_params
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class SyncBuffersHook(Hook):
+    """Synchronize model buffers such as running_mean and running_var in BN at
+    the end of each epoch.
+
+    Args:
+        distributed (bool): Whether distributed training is used. It is
+          effective only for distributed training. Defaults to True.
+    """
+
+    def __init__(self, distributed=True):
+        self.distributed = distributed
+
+    def after_epoch(self, runner):
+        """All-reduce model buffers at the end of each epoch."""
+        if self.distributed:
+            allreduce_params(runner.model.buffers())
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/iter_based_runner.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/iter_based_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..0038f58f588e006e80efb18e8a15842a4ef0f157
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/iter_based_runner.py
@@ -0,0 +1,256 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+import os
+import platform
+import shutil
+import time
+import warnings
+
+import torch
+from torch.optim import Optimizer
+
+import mmcv
+from .base_runner import BaseRunner
+from .builder import RUNNERS
+from .checkpoint import save_checkpoint
+from .hooks import IterTimerHook
+from .utils import get_host_info
+
+
+class IterLoader:
+
+    def __init__(self, dataloader):
+        self._dataloader = dataloader
+        self.iter_loader = iter(self._dataloader)
+        self._epoch = 0
+
+    @property
+    def epoch(self):
+        return self._epoch
+
+    def __next__(self):
+        try:
+            data = next(self.iter_loader)
+        except StopIteration:
+            self._epoch += 1
+            if hasattr(self._dataloader.sampler, 'set_epoch'):
+                self._dataloader.sampler.set_epoch(self._epoch)
+            self.iter_loader = iter(self._dataloader)
+            data = next(self.iter_loader)
+
+        return data
+
+    def __len__(self):
+        return len(self._dataloader)
+
+
+@RUNNERS.register_module()
+class IterBasedRunner(BaseRunner):
+    """Iteration-based Runner.
+
+    This runner train models iteration by iteration.
+    """
+
+    def train(self, data_loader, **kwargs):
+        self.model.train()
+        self.mode = 'train'
+        self.data_loader = data_loader
+        self._epoch = data_loader.epoch
+        data_batch = next(data_loader)
+        self.call_hook('before_train_iter')
+        outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
+        if not isinstance(outputs, dict):
+            raise TypeError('model.train_step() must return a dict')
+        if 'log_vars' in outputs:
+            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
+        self.outputs = outputs
+        self.call_hook('after_train_iter')
+        self._inner_iter += 1
+        self._iter += 1
+
+    @torch.no_grad()
+    def val(self, data_loader, **kwargs):
+        self.model.eval()
+        self.mode = 'val'
+        self.data_loader = data_loader
+        data_batch = next(data_loader)
+        self.call_hook('before_val_iter')
+        outputs = self.model.val_step(data_batch, **kwargs)
+        if not isinstance(outputs, dict):
+            raise TypeError('model.val_step() must return a dict')
+        if 'log_vars' in outputs:
+            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
+        self.outputs = outputs
+        self.call_hook('after_val_iter')
+        self._inner_iter += 1
+
+    def run(self, data_loaders, workflow, max_iters=None, **kwargs):
+        """Start running.
+
+        Args:
+            data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
+                and validation.
+            workflow (list[tuple]): A list of (phase, iters) to specify the
+                running order and iterations. E.g, [('train', 10000),
+                ('val', 1000)] means running 10000 iterations for training and
+                1000 iterations for validation, iteratively.
+        """
+        assert isinstance(data_loaders, list)
+        assert mmcv.is_list_of(workflow, tuple)
+        assert len(data_loaders) == len(workflow)
+        if max_iters is not None:
+            warnings.warn(
+                'setting max_iters in run is deprecated, '
+                'please set max_iters in runner_config', DeprecationWarning)
+            self._max_iters = max_iters
+        assert self._max_iters is not None, (
+            'max_iters must be specified during instantiation')
+
+        work_dir = self.work_dir if self.work_dir is not None else 'NONE'
+        self.logger.info('Start running, host: %s, work_dir: %s',
+                         get_host_info(), work_dir)
+        self.logger.info('workflow: %s, max: %d iters', workflow,
+                         self._max_iters)
+        self.call_hook('before_run')
+
+        iter_loaders = [IterLoader(x) for x in data_loaders]
+
+        self.call_hook('before_epoch')
+        cann_profiling_path = './cann_profiling'
+        if not os.path.exists(cann_profiling_path):
+            os.makedirs(cann_profiling_path)
+        while self.iter < self._max_iters:
+            for i, flow in enumerate(workflow):
+                self._inner_iter = 0
+                mode, iters = flow
+                if not isinstance(mode, str) or not hasattr(self, mode):
+                    raise ValueError(
+                        'runner has no method named "{}" to run a workflow'.
+                        format(mode))
+                iter_runner = getattr(self, mode)
+                if self.iter > 5000000:
+                    #with torch.autograd.profiler.profile(use_npu=True) as prof:
+                    with torch.npu.profile(cann_profiling_path):
+                        iter_runner(iter_loaders[i], **kwargs)
+                    #prof.export_chrome_trace("/home/crm/segformer/output.prof")
+                    exit(0)                
+                for _ in range(iters):
+                    if mode == 'train' and self.iter >= self._max_iters:
+                        break
+                    iter_runner(iter_loaders[i], **kwargs)
+
+        time.sleep(1)  # wait for some hooks like loggers to finish
+        self.call_hook('after_epoch')
+        self.call_hook('after_run')
+
+    def resume(self,
+               checkpoint,
+               resume_optimizer=True,
+               map_location='default'):
+        """Resume model from checkpoint.
+
+        Args:
+            checkpoint (str): Checkpoint to resume from.
+            resume_optimizer (bool, optional): Whether resume the optimizer(s)
+                if the checkpoint file includes optimizer(s). Default to True.
+            map_location (str, optional): Same as :func:`torch.load`.
+                Default to 'default'.
+        """
+        if map_location == 'default':
+            device_id = torch.cuda.current_device()
+            checkpoint = self.load_checkpoint(
+                checkpoint,
+                map_location=lambda storage, loc: storage.cuda(device_id))
+        else:
+            checkpoint = self.load_checkpoint(
+                checkpoint, map_location=map_location)
+
+        self._epoch = checkpoint['meta']['epoch']
+        self._iter = checkpoint['meta']['iter']
+        self._inner_iter = checkpoint['meta']['iter']
+        if 'optimizer' in checkpoint and resume_optimizer:
+            if isinstance(self.optimizer, Optimizer):
+                self.optimizer.load_state_dict(checkpoint['optimizer'])
+            elif isinstance(self.optimizer, dict):
+                for k in self.optimizer.keys():
+                    self.optimizer[k].load_state_dict(
+                        checkpoint['optimizer'][k])
+            else:
+                raise TypeError(
+                    'Optimizer should be dict or torch.optim.Optimizer '
+                    f'but got {type(self.optimizer)}')
+
+        self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}')
+
+    def save_checkpoint(self,
+                        out_dir,
+                        filename_tmpl='iter_{}.pth',
+                        meta=None,
+                        save_optimizer=True,
+                        create_symlink=True):
+        """Save checkpoint to file.
+
+        Args:
+            out_dir (str): Directory to save checkpoint files.
+            filename_tmpl (str, optional): Checkpoint file template.
+                Defaults to 'iter_{}.pth'.
+            meta (dict, optional): Metadata to be saved in checkpoint.
+                Defaults to None.
+            save_optimizer (bool, optional): Whether save optimizer.
+                Defaults to True.
+            create_symlink (bool, optional): Whether create symlink to the
+                latest checkpoint file. Defaults to True.
+        """
+        if meta is None:
+            meta = dict(iter=self.iter + 1, epoch=self.epoch + 1)
+        elif isinstance(meta, dict):
+            meta.update(iter=self.iter + 1, epoch=self.epoch + 1)
+        else:
+            raise TypeError(
+                f'meta should be a dict or None, but got {type(meta)}')
+        if self.meta is not None:
+            meta.update(self.meta)
+
+        filename = filename_tmpl.format(self.iter + 1)
+        filepath = osp.join(out_dir, filename)
+        optimizer = self.optimizer if save_optimizer else None
+        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
+        # in some environments, `os.symlink` is not supported, you may need to
+        # set `create_symlink` to False
+        if create_symlink:
+            dst_file = osp.join(out_dir, 'latest.pth')
+            if platform.system() != 'Windows':
+                mmcv.symlink(filename, dst_file)
+            else:
+                shutil.copy(filepath, dst_file)
+
+    def register_training_hooks(self,
+                                lr_config,
+                                optimizer_config=None,
+                                checkpoint_config=None,
+                                log_config=None,
+                                momentum_config=None):
+        """Register default hooks for iter-based training.
+
+        Default hooks include:
+
+        - LrUpdaterHook
+        - MomentumUpdaterHook
+        - OptimizerStepperHook
+        - CheckpointSaverHook
+        - IterTimerHook
+        - LoggerHook(s)
+        """
+        if checkpoint_config is not None:
+            checkpoint_config.setdefault('by_epoch', False)
+        if lr_config is not None:
+            lr_config.setdefault('by_epoch', False)
+        self.register_lr_hook(lr_config)
+        self.register_momentum_hook(momentum_config)
+        self.register_optimizer_hook(optimizer_config)
+        self.register_checkpoint_hook(checkpoint_config)
+        self.register_hook(IterTimerHook())
+        if log_config is not None:
+            for info in log_config['hooks']:
+                info.setdefault('by_epoch', False)
+        self.register_logger_hooks(log_config)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/log_buffer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/log_buffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed4652f6d6085f6b8765faf0b78066e5eaa10e14
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/log_buffer.py
@@ -0,0 +1,41 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from collections import OrderedDict
+
+import numpy as np
+
+
+class LogBuffer:
+
+    def __init__(self):
+        self.val_history = OrderedDict()
+        self.n_history = OrderedDict()
+        self.output = OrderedDict()
+        self.ready = False
+
+    def clear(self):
+        self.val_history.clear()
+        self.n_history.clear()
+        self.clear_output()
+
+    def clear_output(self):
+        self.output.clear()
+        self.ready = False
+
+    def update(self, vars, count=1):
+        assert isinstance(vars, dict)
+        for key, var in vars.items():
+            if key not in self.val_history:
+                self.val_history[key] = []
+                self.n_history[key] = []
+            self.val_history[key].append(var)
+            self.n_history[key].append(count)
+
+    def average(self, n=0):
+        """Average latest n values or all values."""
+        assert n >= 0
+        for key in self.val_history:
+            values = np.array(self.val_history[key][-n:])
+            nums = np.array(self.n_history[key][-n:])
+            avg = np.sum(values * nums) / np.sum(nums)
+            self.output[key] = avg
+        self.ready = True
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..faa0585a9391faff4a1be9a3393e3dac7f2db3fd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/__init__.py
@@ -0,0 +1,8 @@
+from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer,
+                      build_optimizer_constructor)
+from .default_constructor import DefaultOptimizerConstructor
+
+__all__ = [
+    'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor',
+    'build_optimizer', 'build_optimizer_constructor'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/builder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..e97082a727ee4193db86a416768b2a870c4436a7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/builder.py
@@ -0,0 +1,43 @@
+import copy
+import inspect
+
+import torch
+
+from ...utils import Registry, build_from_cfg
+
+OPTIMIZERS = Registry('optimizer')
+OPTIMIZER_BUILDERS = Registry('optimizer builder')
+
+
+def register_torch_optimizers():
+    torch_optimizers = []
+    for module_name in dir(torch.optim):
+        if module_name.startswith('__'):
+            continue
+        _optim = getattr(torch.optim, module_name)
+        if inspect.isclass(_optim) and issubclass(_optim,
+                                                  torch.optim.Optimizer):
+            OPTIMIZERS.register_module()(_optim)
+            torch_optimizers.append(module_name)
+    return torch_optimizers
+
+
+TORCH_OPTIMIZERS = register_torch_optimizers()
+
+
+def build_optimizer_constructor(cfg):
+    return build_from_cfg(cfg, OPTIMIZER_BUILDERS)
+
+
+def build_optimizer(model, cfg):
+    optimizer_cfg = copy.deepcopy(cfg)
+    constructor_type = optimizer_cfg.pop('constructor',
+                                         'DefaultOptimizerConstructor')
+    paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)
+    optim_constructor = build_optimizer_constructor(
+        dict(
+            type=constructor_type,
+            optimizer_cfg=optimizer_cfg,
+            paramwise_cfg=paramwise_cfg))
+    optimizer = optim_constructor(model)
+    return optimizer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/default_constructor.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/default_constructor.py
new file mode 100644
index 0000000000000000000000000000000000000000..68b4489b3b92ebbe479b7dd1f9f6d8655f72d9dc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/optimizer/default_constructor.py
@@ -0,0 +1,270 @@
+import warnings
+
+import torch
+from torch.nn import GroupNorm, LayerNorm
+import apex
+
+from mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of
+from mmcv.utils.ext_loader import check_ops_exist
+from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS
+
+
+@OPTIMIZER_BUILDERS.register_module()
+class DefaultOptimizerConstructor:
+    """Default constructor for optimizers.
+
+    By default each parameter share the same optimizer settings, and we
+    provide an argument ``paramwise_cfg`` to specify parameter-wise settings.
+    It is a dict and may contain the following fields:
+
+    - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If
+      one of the keys in ``custom_keys`` is a substring of the name of one
+      parameter, then the setting of the parameter will be specified by
+      ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will
+      be ignored. It should be noted that the aforementioned ``key`` is the
+      longest key that is a substring of the name of the parameter. If there
+      are multiple matched keys with the same length, then the key with lower
+      alphabet order will be chosen.
+      ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult``
+      and ``decay_mult``. See Example 2 below.
+    - ``bias_lr_mult`` (float): It will be multiplied to the learning
+      rate for all bias parameters (except for those in normalization
+      layers and offset layers of DCN).
+    - ``bias_decay_mult`` (float): It will be multiplied to the weight
+      decay for all bias parameters (except for those in
+      normalization layers, depthwise conv layers, offset layers of DCN).
+    - ``norm_decay_mult`` (float): It will be multiplied to the weight
+      decay for all weight and bias parameters of normalization
+      layers.
+    - ``dwconv_decay_mult`` (float): It will be multiplied to the weight
+      decay for all weight and bias parameters of depthwise conv
+      layers.
+    - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning
+      rate for parameters of offset layer in the deformable convs
+      of a model.
+    - ``bypass_duplicate`` (bool): If true, the duplicate parameters
+      would not be added into optimizer. Default: False.
+
+    Note:
+        1. If the option ``dcn_offset_lr_mult`` is used, the constructor will
+            override the effect of ``bias_lr_mult`` in the bias of offset
+            layer. So be careful when using both ``bias_lr_mult`` and
+            ``dcn_offset_lr_mult``. If you wish to apply both of them to the
+            offset layer in deformable convs, set ``dcn_offset_lr_mult``
+            to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``.
+        2. If the option ``dcn_offset_lr_mult`` is used, the construtor will
+            apply it to all the DCN layers in the model. So be carefull when
+            the model contains multiple DCN layers in places other than
+            backbone.
+
+    Args:
+        model (:obj:`nn.Module`): The model with parameters to be optimized.
+        optimizer_cfg (dict): The config dict of the optimizer.
+            Positional fields are
+
+                - `type`: class name of the optimizer.
+
+            Optional fields are
+
+                - any arguments of the corresponding optimizer type, e.g.,
+                  lr, weight_decay, momentum, etc.
+        paramwise_cfg (dict, optional): Parameter-wise options.
+
+    Example 1:
+        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
+        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
+        >>>                      weight_decay=0.0001)
+        >>> paramwise_cfg = dict(norm_decay_mult=0.)
+        >>> optim_builder = DefaultOptimizerConstructor(
+        >>>     optimizer_cfg, paramwise_cfg)
+        >>> optimizer = optim_builder(model)
+
+    Example 2:
+        >>> # assume model have attribute model.backbone and model.cls_head
+        >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95)
+        >>> paramwise_cfg = dict(custom_keys={
+                '.backbone': dict(lr_mult=0.1, decay_mult=0.9)})
+        >>> optim_builder = DefaultOptimizerConstructor(
+        >>>     optimizer_cfg, paramwise_cfg)
+        >>> optimizer = optim_builder(model)
+        >>> # Then the `lr` and `weight_decay` for model.backbone is
+        >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for
+        >>> # model.cls_head is (0.01, 0.95).
+    """
+
+    def __init__(self, optimizer_cfg, paramwise_cfg=None):
+        if not isinstance(optimizer_cfg, dict):
+            raise TypeError('optimizer_cfg should be a dict',
+                            f'but got {type(optimizer_cfg)}')
+        self.optimizer_cfg = optimizer_cfg
+        self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg
+        self.base_lr = optimizer_cfg.get('lr', None)
+        self.base_wd = optimizer_cfg.get('weight_decay', None)
+        self._validate_cfg()
+
+    def _validate_cfg(self):
+        if not isinstance(self.paramwise_cfg, dict):
+            raise TypeError('paramwise_cfg should be None or a dict, '
+                            f'but got {type(self.paramwise_cfg)}')
+
+        if 'custom_keys' in self.paramwise_cfg:
+            if not isinstance(self.paramwise_cfg['custom_keys'], dict):
+                raise TypeError(
+                    'If specified, custom_keys must be a dict, '
+                    f'but got {type(self.paramwise_cfg["custom_keys"])}')
+            if self.base_wd is None:
+                for key in self.paramwise_cfg['custom_keys']:
+                    if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]:
+                        raise ValueError('base_wd should not be None')
+
+        # get base lr and weight decay
+        # weight_decay must be explicitly specified if mult is specified
+        if ('bias_decay_mult' in self.paramwise_cfg
+                or 'norm_decay_mult' in self.paramwise_cfg
+                or 'dwconv_decay_mult' in self.paramwise_cfg):
+            if self.base_wd is None:
+                raise ValueError('base_wd should not be None')
+
+    def _is_in(self, param_group, param_group_list):
+        assert is_list_of(param_group_list, dict)
+        param = set(param_group['params'])
+        param_set = set()
+        for group in param_group_list:
+            param_set.update(set(group['params']))
+
+        return not param.isdisjoint(param_set)
+
+    def add_params(self, params, module, prefix='', is_dcn_module=None):
+        """Add all parameters of module to the params list.
+
+        The parameters of the given module will be added to the list of param
+        groups, with specific rules defined by paramwise_cfg.
+
+        Args:
+            params (list[dict]): A list of param groups, it will be modified
+                in place.
+            module (nn.Module): The module to be added.
+            prefix (str): The prefix of the module
+            is_dcn_module (int|float|None): If the current module is a
+                submodule of DCN, `is_dcn_module` will be passed to
+                control conv_offset layer's learning rate. Defaults to None.
+        """
+        # get param-wise options
+        custom_keys = self.paramwise_cfg.get('custom_keys', {})
+        # first sort with alphabet order and then sort with reversed len of str
+        sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True)
+
+        bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.)
+        bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.)
+        norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.)
+        dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.)
+        bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False)
+        dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.)
+
+        # special rules for norm layers and depth-wise conv layers
+        is_norm = isinstance(module,
+                             (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm))
+        is_dwconv = (
+            isinstance(module, torch.nn.Conv2d)
+            and module.in_channels == module.groups)
+
+        for name, param in module.named_parameters(recurse=False):
+            param_group = {'params': [param]}
+            if not param.requires_grad:
+                params.append(param_group)
+                continue
+            if bypass_duplicate and self._is_in(param_group, params):
+                warnings.warn(f'{prefix} is duplicate. It is skipped since '
+                              f'bypass_duplicate={bypass_duplicate}')
+                continue
+            # if the parameter match one of the custom keys, ignore other rules
+            is_custom = False
+            for key in sorted_keys:
+                if key in f'{prefix}.{name}':
+                    is_custom = True
+                    lr_mult = custom_keys[key].get('lr_mult', 1.)
+                    param_group['lr'] = self.base_lr * lr_mult
+                    if self.base_wd is not None:
+                        decay_mult = custom_keys[key].get('decay_mult', 1.)
+                        param_group['weight_decay'] = self.base_wd * decay_mult
+                    break
+
+            if not is_custom:
+                # bias_lr_mult affects all bias parameters
+                # except for norm.bias dcn.conv_offset.bias
+                if name == 'bias' and not (is_norm or is_dcn_module):
+                    param_group['lr'] = self.base_lr * bias_lr_mult
+
+                if (prefix.find('conv_offset') != -1 and is_dcn_module
+                        and isinstance(module, torch.nn.Conv2d)):
+                    # deal with both dcn_offset's bias & weight
+                    param_group['lr'] = self.base_lr * dcn_offset_lr_mult
+
+                # apply weight decay policies
+                if self.base_wd is not None:
+                    # norm decay
+                    if is_norm:
+                        param_group[
+                            'weight_decay'] = self.base_wd * norm_decay_mult
+                    # depth-wise conv
+                    elif is_dwconv:
+                        param_group[
+                            'weight_decay'] = self.base_wd * dwconv_decay_mult
+                    # bias lr and decay
+                    elif name == 'bias' and not is_dcn_module:
+                        # TODO: current bias_decay_mult will have affect on DCN
+                        param_group[
+                            'weight_decay'] = self.base_wd * bias_decay_mult
+            params.append(param_group)
+
+        if check_ops_exist():
+            from mmcv.ops import DeformConv2d, ModulatedDeformConv2d
+            is_dcn_module = isinstance(module,
+                                       (DeformConv2d, ModulatedDeformConv2d))
+        else:
+            is_dcn_module = False
+        for child_name, child_mod in module.named_children():
+            child_prefix = f'{prefix}.{child_name}' if prefix else child_name
+            self.add_params(
+                params,
+                child_mod,
+                prefix=child_prefix,
+                is_dcn_module=is_dcn_module)
+
+    def __call__(self, model):
+        if hasattr(model, 'module'):
+            model = model.module
+
+        optimizer_cfg = self.optimizer_cfg.copy()
+        # if no paramwise option is specified, just use the global setting
+        if not self.paramwise_cfg:
+            optimizer_cfg['params'] = model.parameters()
+            return build_from_cfg(optimizer_cfg, OPTIMIZERS)
+
+        # set param-wise lr and weight decay recursively
+        params = []
+        self.add_params(params, model)
+        #optimizer_cfg['params'] = params
+        grouped_params_dict = {}
+        grouped_params = []
+        gp0 = []
+        for p in params:
+            if 'lr' in p:
+                lr = p['lr']
+                wd = p['weight_decay']
+                if lr == optimizer_cfg['lr'] and wd == optimizer_cfg['weight_decay']:
+                    gp0.append(p['params'][0])
+                    continue
+                if (lr, wd) not in grouped_params_dict:
+                    grouped_params_dict[(lr, wd)] = []
+                grouped_params_dict[(lr, wd)].append(p['params'][0])
+            else:
+                gp0.append(p['params'][0])
+        num_params = len(gp0)
+        optimizer = apex.optimizers.NpuFusedAdamW(gp0, lr=optimizer_cfg['lr'], weight_decay=optimizer_cfg['weight_decay'])  #build_from_cfg(optimizer_cfg, OPTIMIZERS)
+        for k in grouped_params_dict.keys():
+            num_params += len(grouped_params_dict[k])
+            optimizer.add_param_group(
+                {"params": grouped_params_dict[k], "lr": k[0], "weight_decay": k[1]}
+            ) 
+        return optimizer
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/priority.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/priority.py
new file mode 100644
index 0000000000000000000000000000000000000000..b58c67e313c151da41d600498252a06e73821979
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/priority.py
@@ -0,0 +1,54 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from enum import Enum
+
+
+class Priority(Enum):
+    """Hook priority levels.
+
+    +------------+------------+
+    | Level      | Value      |
+    +============+============+
+    | HIGHEST    | 0          |
+    +------------+------------+
+    | VERY_HIGH  | 10         |
+    +------------+------------+
+    | HIGH       | 30         |
+    +------------+------------+
+    | NORMAL     | 50         |
+    +------------+------------+
+    | LOW        | 70         |
+    +------------+------------+
+    | VERY_LOW   | 90         |
+    +------------+------------+
+    | LOWEST     | 100        |
+    +------------+------------+
+    """
+
+    HIGHEST = 0
+    VERY_HIGH = 10
+    HIGH = 30
+    NORMAL = 50
+    LOW = 70
+    VERY_LOW = 90
+    LOWEST = 100
+
+
+def get_priority(priority):
+    """Get priority value.
+
+    Args:
+        priority (int or str or :obj:`Priority`): Priority.
+
+    Returns:
+        int: The priority value.
+    """
+    if isinstance(priority, int):
+        if priority < 0 or priority > 100:
+            raise ValueError('priority must be between 0 and 100')
+        return priority
+    elif isinstance(priority, Priority):
+        return priority.value
+    elif isinstance(priority, str):
+        return Priority[priority.upper()].value
+    else:
+        raise TypeError('priority must be an integer or Priority enum value')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..168305f0cd32b02f919b69b5861625bf470e279d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/runner/utils.py
@@ -0,0 +1,81 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os
+import random
+import sys
+import time
+from getpass import getuser
+from socket import gethostname
+
+import numpy as np
+import torch
+
+import mmcv
+
+
+def get_host_info():
+    return f'{getuser()}@{gethostname()}'
+
+
+def get_time_str():
+    return time.strftime('%Y%m%d_%H%M%S', time.localtime())
+
+
+def obj_from_dict(info, parent=None, default_args=None):
+    """Initialize an object from dict.
+
+    The dict must contain the key "type", which indicates the object type, it
+    can be either a string or type, such as "list" or ``list``. Remaining
+    fields are treated as the arguments for constructing the object.
+
+    Args:
+        info (dict): Object types and arguments.
+        parent (:class:`module`): Module which may containing expected object
+            classes.
+        default_args (dict, optional): Default arguments for initializing the
+            object.
+
+    Returns:
+        any type: Object built from the dict.
+    """
+    assert isinstance(info, dict) and 'type' in info
+    assert isinstance(default_args, dict) or default_args is None
+    args = info.copy()
+    obj_type = args.pop('type')
+    if mmcv.is_str(obj_type):
+        if parent is not None:
+            obj_type = getattr(parent, obj_type)
+        else:
+            obj_type = sys.modules[obj_type]
+    elif not isinstance(obj_type, type):
+        raise TypeError('type must be a str or valid type, but '
+                        f'got {type(obj_type)}')
+    if default_args is not None:
+        for name, value in default_args.items():
+            args.setdefault(name, value)
+    return obj_type(**args)
+
+
+def set_random_seed(seed, deterministic=False, use_rank_shift=False):
+    """Set random seed.
+
+    Args:
+        seed (int): Seed to be used.
+        deterministic (bool): Whether to set the deterministic option for
+            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+            to True and `torch.backends.cudnn.benchmark` to False.
+            Default: False.
+        rank_shift (bool): Whether to add rank number to the random seed to
+            have different random seed in different threads. Default: False.
+    """
+    if use_rank_shift:
+        rank, _ = mmcv.runner.get_dist_info()
+        seed += rank
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    if deterministic:
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..39a2eba6eac7a2a6cc694363ec40b829350c3936
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/__init__.py
@@ -0,0 +1,12 @@
+# flake8: noqa
+from .init_plugins import is_tensorrt_plugin_loaded, load_tensorrt_plugin
+from .tensorrt_utils import (TRTWraper, load_trt_engine, onnx2trt,
+                             save_trt_engine)
+
+# load tensorrt plugin lib
+load_tensorrt_plugin()
+
+__all__ = [
+    'onnx2trt', 'save_trt_engine', 'load_trt_engine', 'TRTWraper',
+    'is_tensorrt_plugin_loaded'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/init_plugins.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/init_plugins.py
new file mode 100644
index 0000000000000000000000000000000000000000..0724970b586bb900649b8ad6f99ed88607e80b25
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/init_plugins.py
@@ -0,0 +1,36 @@
+import ctypes
+import glob
+import os
+
+
+def get_tensorrt_op_path():
+    """Get TensorRT plugins library path."""
+    wildcard = os.path.join(
+        os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
+        '_ext_trt.*.so')
+
+    paths = glob.glob(wildcard)
+    lib_path = paths[0] if len(paths) > 0 else ''
+    return lib_path
+
+
+plugin_is_loaded = False
+
+
+def is_tensorrt_plugin_loaded():
+    """Check if TensorRT plugins library is loaded or not.
+
+    Returns:
+        bool: plugin_is_loaded flag
+    """
+    global plugin_is_loaded
+    return plugin_is_loaded
+
+
+def load_tensorrt_plugin():
+    """load TensorRT plugins library."""
+    global plugin_is_loaded
+    lib_path = get_tensorrt_op_path()
+    if (not plugin_is_loaded) and os.path.exists(lib_path):
+        ctypes.CDLL(lib_path)
+        plugin_is_loaded = True
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/tensorrt_utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/tensorrt_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2616645cc41d219619fbbb88657f0dfac2528240
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/tensorrt/tensorrt_utils.py
@@ -0,0 +1,209 @@
+import tensorrt as trt
+import torch
+
+
+def onnx2trt(onnx_model,
+             opt_shape_dict,
+             log_level=trt.Logger.ERROR,
+             fp16_mode=False,
+             max_workspace_size=0,
+             device_id=0):
+    """Convert onnx model to tensorrt engine.
+
+    Arguments:
+        onnx_model (str or onnx.ModelProto): the onnx model to convert from
+        opt_shape_dict (dict): the min/opt/max shape of each input
+        log_level (TensorRT log level): the log level of TensorRT
+        fp16_mode (bool): enable fp16 mode
+        max_workspace_size (int): set max workspace size of TensorRT engine.
+            some tactic and layers need large workspace.
+        device_id (int): choice the device to create engine.
+
+    Returns:
+        tensorrt.ICudaEngine: the TensorRT engine created from onnx_model
+
+    Example:
+        >>> engine = onnx2trt(
+        >>>             "onnx_model.onnx",
+        >>>             {'input': [[1, 3, 160, 160],
+        >>>                        [1, 3, 320, 320],
+        >>>                        [1, 3, 640, 640]]},
+        >>>             log_level=trt.Logger.WARNING,
+        >>>             fp16_mode=True,
+        >>>             max_workspace_size=1 << 30,
+        >>>             device_id=0)
+        >>>             })
+    """
+    device = torch.device('cuda:{}'.format(device_id))
+    # create builder and network
+    logger = trt.Logger(log_level)
+    builder = trt.Builder(logger)
+    EXPLICIT_BATCH = 1 << (int)(
+        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+
+    # parse onnx
+    parser = trt.OnnxParser(network, logger)
+
+    if isinstance(onnx_model, str):
+        assert parser.parse_from_file(onnx_model), 'parse onnx failed.'
+    else:
+        assert parser.parse(
+            onnx_model.SerializeToString()), 'parse onnx failed.'
+
+    # config builder
+    builder.max_workspace_size = max_workspace_size
+
+    config = builder.create_builder_config()
+    config.max_workspace_size = max_workspace_size
+    profile = builder.create_optimization_profile()
+
+    for input_name, param in opt_shape_dict.items():
+        min_shape = tuple(param[0][:])
+        opt_shape = tuple(param[1][:])
+        max_shape = tuple(param[2][:])
+        profile.set_shape(input_name, min_shape, opt_shape, max_shape)
+    config.add_optimization_profile(profile)
+
+    if fp16_mode:
+        builder.fp16_mode = fp16_mode
+        config.set_flag(trt.BuilderFlag.FP16)
+
+    # create engine
+    with torch.cuda.device(device):
+        engine = builder.build_engine(network, config)
+
+    return engine
+
+
+def save_trt_engine(engine, path):
+    """Serialize TensorRT engine to disk.
+
+    Arguments:
+        engine (tensorrt.ICudaEngine): TensorRT engine to serialize
+        path (str): disk path to write the engine
+    """
+    with open(path, mode='wb') as f:
+        f.write(bytearray(engine.serialize()))
+
+
+def load_trt_engine(path):
+    """Deserialize TensorRT engine from disk.
+
+    Arguments:
+        path (str): disk path to read the engine
+
+    Returns:
+        tensorrt.ICudaEngine: the TensorRT engine loaded from disk
+    """
+    with trt.Logger() as logger, trt.Runtime(logger) as runtime:
+        with open(path, mode='rb') as f:
+            engine_bytes = f.read()
+        engine = runtime.deserialize_cuda_engine(engine_bytes)
+        return engine
+
+
+def torch_dtype_from_trt(dtype):
+    """Convert pytorch dtype to TensorRT dtype."""
+    if dtype == trt.bool:
+        return torch.bool
+    elif dtype == trt.int8:
+        return torch.int8
+    elif dtype == trt.int32:
+        return torch.int32
+    elif dtype == trt.float16:
+        return torch.float16
+    elif dtype == trt.float32:
+        return torch.float32
+    else:
+        raise TypeError('%s is not supported by torch' % dtype)
+
+
+def torch_device_from_trt(device):
+    """Convert pytorch device to TensorRT device."""
+    if device == trt.TensorLocation.DEVICE:
+        return torch.device('cuda')
+    elif device == trt.TensorLocation.HOST:
+        return torch.device('cpu')
+    else:
+        return TypeError('%s is not supported by torch' % device)
+
+
+class TRTWraper(torch.nn.Module):
+    """TensorRT engine Wraper.
+
+    Arguments:
+        engine (tensorrt.ICudaEngine): TensorRT engine to wrap
+        input_names (list[str]): names of each inputs
+        output_names (list[str]): names of each outputs
+
+    Note:
+        If the engine is converted from onnx model. The input_names and
+        output_names should be the same as onnx model.
+    """
+
+    def __init__(self, engine, input_names, output_names):
+        super(TRTWraper, self).__init__()
+        self.engine = engine
+        if isinstance(self.engine, str):
+            self.engine = load_trt_engine(engine)
+
+        if not isinstance(self.engine, trt.ICudaEngine):
+            raise TypeError('engine should be str or trt.ICudaEngine')
+
+        self._register_state_dict_hook(TRTWraper._on_state_dict)
+        self.context = self.engine.create_execution_context()
+
+        self.input_names = input_names
+        self.output_names = output_names
+
+    def _on_state_dict(self, state_dict, prefix, local_metadata):
+        state_dict[prefix + 'engine'] = bytearray(self.engine.serialize())
+        state_dict[prefix + 'input_names'] = self.input_names
+        state_dict[prefix + 'output_names'] = self.output_names
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        engine_bytes = state_dict[prefix + 'engine']
+
+        with trt.Logger() as logger, trt.Runtime(logger) as runtime:
+            self.engine = runtime.deserialize_cuda_engine(engine_bytes)
+            self.context = self.engine.create_execution_context()
+
+        self.input_names = state_dict[prefix + 'input_names']
+        self.output_names = state_dict[prefix + 'output_names']
+
+    def forward(self, inputs):
+        """
+        Arguments:
+            inputs (dict): dict of input name-tensors pair
+
+        Return:
+            dict: dict of output name-tensors pair
+        """
+        assert self.input_names is not None
+        assert self.output_names is not None
+        bindings = [None] * (len(self.input_names) + len(self.output_names))
+
+        for input_name, input_tensor in inputs.items():
+            idx = self.engine.get_binding_index(input_name)
+
+            self.context.set_binding_shape(idx, tuple(input_tensor.shape))
+            bindings[idx] = input_tensor.contiguous().data_ptr()
+
+        # create output tensors
+        outputs = {}
+        for i, output_name in enumerate(self.output_names):
+            idx = self.engine.get_binding_index(output_name)
+            dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
+            shape = tuple(self.context.get_binding_shape(idx))
+
+            device = torch_device_from_trt(self.engine.get_location(idx))
+            output = torch.empty(size=shape, dtype=dtype, device=device)
+            outputs[output_name] = output
+            bindings[idx] = output.data_ptr()
+
+        self.context.execute_async_v2(bindings,
+                                      torch.cuda.current_stream().cuda_stream)
+
+        return outputs
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffbbe4b2922b8d0ffe7c4ecf0e973e7f78ee42a8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/__init__.py
@@ -0,0 +1,61 @@
+# flake8: noqa
+# Copyright (c) Open-MMLab. All rights reserved.
+from .config import Config, ConfigDict, DictAction
+from .misc import (check_prerequisites, concat_list, deprecated_api_warning,
+                   import_modules_from_strings, is_list_of, is_seq_of, is_str,
+                   is_tuple_of, iter_cast, list_cast, requires_executable,
+                   requires_package, slice_list, tuple_cast)
+from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist,
+                   scandir, symlink)
+from .progressbar import (ProgressBar, track_iter_progress,
+                          track_parallel_progress, track_progress)
+from .testing import (assert_attrs_equal, assert_dict_contains_subset,
+                      assert_dict_has_keys, assert_is_norm_layer,
+                      assert_keys_equal, assert_params_all_zeros)
+from .timer import Timer, TimerError, check_time
+from .version_utils import digit_version, get_git_hash
+
+try:
+    import torch
+except ImportError:
+    __all__ = [
+        'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast',
+        'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of',
+        'slice_list', 'concat_list', 'check_prerequisites', 'requires_package',
+        'requires_executable', 'is_filepath', 'fopen', 'check_file_exist',
+        'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar',
+        'track_progress', 'track_iter_progress', 'track_parallel_progress',
+        'Timer', 'TimerError', 'check_time', 'deprecated_api_warning',
+        'digit_version', 'get_git_hash', 'import_modules_from_strings',
+        'assert_dict_contains_subset', 'assert_attrs_equal',
+        'assert_dict_has_keys', 'assert_keys_equal'
+    ]
+else:
+    from .env import collect_env
+    from .logging import get_logger, print_log
+    from .parrots_wrapper import (
+        CUDA_HOME, TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension,
+        DataLoader, PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd,
+        _AdaptiveMaxPoolNd, _AvgPoolNd, _BatchNorm, _ConvNd,
+        _ConvTransposeMixin, _InstanceNorm, _MaxPoolNd, get_build_config)
+    from .parrots_jit import jit, skip_no_elena
+    from .registry import Registry, build_from_cfg
+    __all__ = [
+        'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger',
+        'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast',
+        'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list',
+        'check_prerequisites', 'requires_package', 'requires_executable',
+        'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist',
+        'symlink', 'scandir', 'ProgressBar', 'track_progress',
+        'track_iter_progress', 'track_parallel_progress', 'Registry',
+        'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'CUDA_HOME',
+        'SyncBatchNorm', '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd',
+        '_AvgPoolNd', '_BatchNorm', '_ConvNd', '_ConvTransposeMixin',
+        '_InstanceNorm', '_MaxPoolNd', 'get_build_config', 'BuildExtension',
+        'CppExtension', 'CUDAExtension', 'DataLoader', 'PoolDataLoader',
+        'TORCH_VERSION', 'deprecated_api_warning', 'digit_version',
+        'get_git_hash', 'import_modules_from_strings', 'jit', 'skip_no_elena',
+        'assert_dict_contains_subset', 'assert_attrs_equal',
+        'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer',
+        'assert_params_all_zeros'
+    ]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/config.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..17e44e7cff4940b2ff2f1b3bb1507a1cf0a76280
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/config.py
@@ -0,0 +1,579 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import ast
+import os.path as osp
+import platform
+import shutil
+import sys
+import tempfile
+from argparse import Action, ArgumentParser
+from collections import abc
+from importlib import import_module
+
+from addict import Dict
+from yapf.yapflib.yapf_api import FormatCode
+
+from .misc import import_modules_from_strings
+from .path import check_file_exist
+
+if platform.system() == 'Windows':
+    import regex as re
+else:
+    import re
+
+BASE_KEY = '_base_'
+DELETE_KEY = '_delete_'
+RESERVED_KEYS = ['filename', 'text', 'pretty_text']
+
+
+class ConfigDict(Dict):
+
+    def __missing__(self, name):
+        raise KeyError(name)
+
+    def __getattr__(self, name):
+        try:
+            value = super(ConfigDict, self).__getattr__(name)
+        except KeyError:
+            ex = AttributeError(f"'{self.__class__.__name__}' object has no "
+                                f"attribute '{name}'")
+        except Exception as e:
+            ex = e
+        else:
+            return value
+        raise ex
+
+
+def add_args(parser, cfg, prefix=''):
+    for k, v in cfg.items():
+        if isinstance(v, str):
+            parser.add_argument('--' + prefix + k)
+        elif isinstance(v, int):
+            parser.add_argument('--' + prefix + k, type=int)
+        elif isinstance(v, float):
+            parser.add_argument('--' + prefix + k, type=float)
+        elif isinstance(v, bool):
+            parser.add_argument('--' + prefix + k, action='store_true')
+        elif isinstance(v, dict):
+            add_args(parser, v, prefix + k + '.')
+        elif isinstance(v, abc.Iterable):
+            parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+')
+        else:
+            print(f'cannot parse key {prefix + k} of type {type(v)}')
+    return parser
+
+
+class Config:
+    """A facility for config and config files.
+
+    It supports common file formats as configs: python/json/yaml. The interface
+    is the same as a dict object and also allows access config values as
+    attributes.
+
+    Example:
+        >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1])))
+        >>> cfg.a
+        1
+        >>> cfg.b
+        {'b1': [0, 1]}
+        >>> cfg.b.b1
+        [0, 1]
+        >>> cfg = Config.fromfile('tests/data/config/a.py')
+        >>> cfg.filename
+        "/home/kchen/projects/mmcv/tests/data/config/a.py"
+        >>> cfg.item4
+        'test'
+        >>> cfg
+        "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: "
+        "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}"
+    """
+
+    @staticmethod
+    def _validate_py_syntax(filename):
+        with open(filename, 'r') as f:
+            content = f.read()
+        try:
+            ast.parse(content)
+        except SyntaxError as e:
+            raise SyntaxError('There are syntax errors in config '
+                              f'file {filename}: {e}')
+
+    @staticmethod
+    def _substitute_predefined_vars(filename, temp_config_name):
+        file_dirname = osp.dirname(filename)
+        file_basename = osp.basename(filename)
+        file_basename_no_extension = osp.splitext(file_basename)[0]
+        file_extname = osp.splitext(filename)[1]
+        support_templates = dict(
+            fileDirname=file_dirname,
+            fileBasename=file_basename,
+            fileBasenameNoExtension=file_basename_no_extension,
+            fileExtname=file_extname)
+        with open(filename, 'r') as f:
+            config_file = f.read()
+        for key, value in support_templates.items():
+            regexp = r'\{\{\s*' + str(key) + r'\s*\}\}'
+            value = value.replace('\\', '/')
+            config_file = re.sub(regexp, value, config_file)
+        with open(temp_config_name, 'w') as tmp_config_file:
+            tmp_config_file.write(config_file)
+
+    @staticmethod
+    def _file2dict(filename, use_predefined_variables=True):
+        filename = osp.abspath(osp.expanduser(filename))
+        check_file_exist(filename)
+        fileExtname = osp.splitext(filename)[1]
+        if fileExtname not in ['.py', '.json', '.yaml', '.yml']:
+            raise IOError('Only py/yml/yaml/json type are supported now!')
+
+        with tempfile.TemporaryDirectory() as temp_config_dir:
+            temp_config_file = tempfile.NamedTemporaryFile(
+                dir=temp_config_dir, suffix=fileExtname)
+            if platform.system() == 'Windows':
+                temp_config_file.close()
+            temp_config_name = osp.basename(temp_config_file.name)
+            # Substitute predefined variables
+            if use_predefined_variables:
+                Config._substitute_predefined_vars(filename,
+                                                   temp_config_file.name)
+            else:
+                shutil.copyfile(filename, temp_config_file.name)
+
+            if filename.endswith('.py'):
+                temp_module_name = osp.splitext(temp_config_name)[0]
+                sys.path.insert(0, temp_config_dir)
+                Config._validate_py_syntax(filename)
+                mod = import_module(temp_module_name)
+                sys.path.pop(0)
+                cfg_dict = {
+                    name: value
+                    for name, value in mod.__dict__.items()
+                    if not name.startswith('__')
+                }
+                # delete imported module
+                del sys.modules[temp_module_name]
+            elif filename.endswith(('.yml', '.yaml', '.json')):
+                import mmcv
+                cfg_dict = mmcv.load(temp_config_file.name)
+            # close temp file
+            temp_config_file.close()
+
+        cfg_text = filename + '\n'
+        with open(filename, 'r') as f:
+            cfg_text += f.read()
+
+        if BASE_KEY in cfg_dict:
+            cfg_dir = osp.dirname(filename)
+            base_filename = cfg_dict.pop(BASE_KEY)
+            base_filename = base_filename if isinstance(
+                base_filename, list) else [base_filename]
+
+            cfg_dict_list = list()
+            cfg_text_list = list()
+            for f in base_filename:
+                _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f))
+                cfg_dict_list.append(_cfg_dict)
+                cfg_text_list.append(_cfg_text)
+
+            base_cfg_dict = dict()
+            for c in cfg_dict_list:
+                if len(base_cfg_dict.keys() & c.keys()) > 0:
+                    raise KeyError('Duplicate key is not allowed among bases')
+                base_cfg_dict.update(c)
+
+            base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict)
+            cfg_dict = base_cfg_dict
+
+            # merge cfg_text
+            cfg_text_list.append(cfg_text)
+            cfg_text = '\n'.join(cfg_text_list)
+
+        return cfg_dict, cfg_text
+
+    @staticmethod
+    def _merge_a_into_b(a, b, allow_list_keys=False):
+        """merge dict ``a`` into dict ``b`` (non-inplace).
+
+        Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid
+        in-place modifications.
+
+        Args:
+            a (dict): The source dict to be merged into ``b``.
+            b (dict): The origin dict to be fetch keys from ``a``.
+            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
+              are allowed in source ``a`` and will replace the element of the
+              corresponding index in b if b is a list. Default: False.
+
+        Returns:
+            dict: The modified dict of ``b`` using ``a``.
+
+        Examples:
+            # Normally merge a into b.
+            >>> Config._merge_a_into_b(
+            ...     dict(obj=dict(a=2)), dict(obj=dict(a=1)))
+            {'obj': {'a': 2}}
+
+            # Delete b first and merge a into b.
+            >>> Config._merge_a_into_b(
+            ...     dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1)))
+            {'obj': {'a': 2}}
+
+            # b is a list
+            >>> Config._merge_a_into_b(
+            ...     {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True)
+            [{'a': 2}, {'b': 2}]
+        """
+        b = b.copy()
+        for k, v in a.items():
+            if allow_list_keys and k.isdigit() and isinstance(b, list):
+                k = int(k)
+                if len(b) <= k:
+                    raise KeyError(f'Index {k} exceeds the length of list {b}')
+                b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys)
+            elif isinstance(v,
+                            dict) and k in b and not v.pop(DELETE_KEY, False):
+                allowed_types = (dict, list) if allow_list_keys else dict
+                if not isinstance(b[k], allowed_types):
+                    raise TypeError(
+                        f'{k}={v} in child config cannot inherit from base '
+                        f'because {k} is a dict in the child config but is of '
+                        f'type {type(b[k])} in base config. You may set '
+                        f'`{DELETE_KEY}=True` to ignore the base config')
+                b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys)
+            else:
+                b[k] = v
+        return b
+
+    @staticmethod
+    def fromfile(filename,
+                 use_predefined_variables=True,
+                 import_custom_modules=True):
+        cfg_dict, cfg_text = Config._file2dict(filename,
+                                               use_predefined_variables)
+        if import_custom_modules and cfg_dict.get('custom_imports', None):
+            import_modules_from_strings(**cfg_dict['custom_imports'])
+        return Config(cfg_dict, cfg_text=cfg_text, filename=filename)
+
+    @staticmethod
+    def auto_argparser(description=None):
+        """Generate argparser from config file automatically (experimental)"""
+        partial_parser = ArgumentParser(description=description)
+        partial_parser.add_argument('config', help='config file path')
+        cfg_file = partial_parser.parse_known_args()[0].config
+        cfg = Config.fromfile(cfg_file)
+        parser = ArgumentParser(description=description)
+        parser.add_argument('config', help='config file path')
+        add_args(parser, cfg)
+        return parser, cfg
+
+    def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
+        if cfg_dict is None:
+            cfg_dict = dict()
+        elif not isinstance(cfg_dict, dict):
+            raise TypeError('cfg_dict must be a dict, but '
+                            f'got {type(cfg_dict)}')
+        for key in cfg_dict:
+            if key in RESERVED_KEYS:
+                raise KeyError(f'{key} is reserved for config file')
+
+        super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
+        super(Config, self).__setattr__('_filename', filename)
+        if cfg_text:
+            text = cfg_text
+        elif filename:
+            with open(filename, 'r') as f:
+                text = f.read()
+        else:
+            text = ''
+        super(Config, self).__setattr__('_text', text)
+
+    @property
+    def filename(self):
+        return self._filename
+
+    @property
+    def text(self):
+        return self._text
+
+    @property
+    def pretty_text(self):
+
+        indent = 4
+
+        def _indent(s_, num_spaces):
+            s = s_.split('\n')
+            if len(s) == 1:
+                return s_
+            first = s.pop(0)
+            s = [(num_spaces * ' ') + line for line in s]
+            s = '\n'.join(s)
+            s = first + '\n' + s
+            return s
+
+        def _format_basic_types(k, v, use_mapping=False):
+            if isinstance(v, str):
+                v_str = f"'{v}'"
+            else:
+                v_str = str(v)
+
+            if use_mapping:
+                k_str = f"'{k}'" if isinstance(k, str) else str(k)
+                attr_str = f'{k_str}: {v_str}'
+            else:
+                attr_str = f'{str(k)}={v_str}'
+            attr_str = _indent(attr_str, indent)
+
+            return attr_str
+
+        def _format_list(k, v, use_mapping=False):
+            # check if all items in the list are dict
+            if all(isinstance(_, dict) for _ in v):
+                v_str = '[\n'
+                v_str += '\n'.join(
+                    f'dict({_indent(_format_dict(v_), indent)}),'
+                    for v_ in v).rstrip(',')
+                if use_mapping:
+                    k_str = f"'{k}'" if isinstance(k, str) else str(k)
+                    attr_str = f'{k_str}: {v_str}'
+                else:
+                    attr_str = f'{str(k)}={v_str}'
+                attr_str = _indent(attr_str, indent) + ']'
+            else:
+                attr_str = _format_basic_types(k, v, use_mapping)
+            return attr_str
+
+        def _contain_invalid_identifier(dict_str):
+            contain_invalid_identifier = False
+            for key_name in dict_str:
+                contain_invalid_identifier |= \
+                    (not str(key_name).isidentifier())
+            return contain_invalid_identifier
+
+        def _format_dict(input_dict, outest_level=False):
+            r = ''
+            s = []
+
+            use_mapping = _contain_invalid_identifier(input_dict)
+            if use_mapping:
+                r += '{'
+            for idx, (k, v) in enumerate(input_dict.items()):
+                is_last = idx >= len(input_dict) - 1
+                end = '' if outest_level or is_last else ','
+                if isinstance(v, dict):
+                    v_str = '\n' + _format_dict(v)
+                    if use_mapping:
+                        k_str = f"'{k}'" if isinstance(k, str) else str(k)
+                        attr_str = f'{k_str}: dict({v_str}'
+                    else:
+                        attr_str = f'{str(k)}=dict({v_str}'
+                    attr_str = _indent(attr_str, indent) + ')' + end
+                elif isinstance(v, list):
+                    attr_str = _format_list(k, v, use_mapping) + end
+                else:
+                    attr_str = _format_basic_types(k, v, use_mapping) + end
+
+                s.append(attr_str)
+            r += '\n'.join(s)
+            if use_mapping:
+                r += '}'
+            return r
+
+        cfg_dict = self._cfg_dict.to_dict()
+        text = _format_dict(cfg_dict, outest_level=True)
+        # copied from setup.cfg
+        yapf_style = dict(
+            based_on_style='pep8',
+            blank_line_before_nested_class_or_def=True,
+            split_before_expression_after_opening_paren=True)
+        text, _ = FormatCode(text, style_config=yapf_style, verify=True)
+
+        return text
+
+    def __repr__(self):
+        return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}'
+
+    def __len__(self):
+        return len(self._cfg_dict)
+
+    def __getattr__(self, name):
+        return getattr(self._cfg_dict, name)
+
+    def __getitem__(self, name):
+        return self._cfg_dict.__getitem__(name)
+
+    def __setattr__(self, name, value):
+        if isinstance(value, dict):
+            value = ConfigDict(value)
+        self._cfg_dict.__setattr__(name, value)
+
+    def __setitem__(self, name, value):
+        if isinstance(value, dict):
+            value = ConfigDict(value)
+        self._cfg_dict.__setitem__(name, value)
+
+    def __iter__(self):
+        return iter(self._cfg_dict)
+
+    def __getstate__(self):
+        return (self._cfg_dict, self._filename, self._text)
+
+    def __setstate__(self, state):
+        _cfg_dict, _filename, _text = state
+        super(Config, self).__setattr__('_cfg_dict', _cfg_dict)
+        super(Config, self).__setattr__('_filename', _filename)
+        super(Config, self).__setattr__('_text', _text)
+
+    def dump(self, file=None):
+        cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict()
+        if self.filename.endswith('.py'):
+            if file is None:
+                return self.pretty_text
+            else:
+                with open(file, 'w') as f:
+                    f.write(self.pretty_text)
+        else:
+            import mmcv
+            if file is None:
+                file_format = self.filename.split('.')[-1]
+                return mmcv.dump(cfg_dict, file_format=file_format)
+            else:
+                mmcv.dump(cfg_dict, file)
+
+    def merge_from_dict(self, options, allow_list_keys=True):
+        """Merge list into cfg_dict.
+
+        Merge the dict parsed by MultipleKVAction into this cfg.
+
+        Examples:
+            >>> options = {'model.backbone.depth': 50,
+            ...            'model.backbone.with_cp':True}
+            >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet'))))
+            >>> cfg.merge_from_dict(options)
+            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+            >>> assert cfg_dict == dict(
+            ...     model=dict(backbone=dict(depth=50, with_cp=True)))
+
+            # Merge list element
+            >>> cfg = Config(dict(pipeline=[
+            ...     dict(type='LoadImage'), dict(type='LoadAnnotations')]))
+            >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')})
+            >>> cfg.merge_from_dict(options, allow_list_keys=True)
+            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+            >>> assert cfg_dict == dict(pipeline=[
+            ...     dict(type='SelfLoadImage'), dict(type='LoadAnnotations')])
+
+        Args:
+            options (dict): dict of configs to merge from.
+            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
+              are allowed in ``options`` and will replace the element of the
+              corresponding index in the config if the config is a list.
+              Default: True.
+        """
+        option_cfg_dict = {}
+        for full_key, v in options.items():
+            d = option_cfg_dict
+            key_list = full_key.split('.')
+            for subkey in key_list[:-1]:
+                d.setdefault(subkey, ConfigDict())
+                d = d[subkey]
+            subkey = key_list[-1]
+            d[subkey] = v
+
+        cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
+        super(Config, self).__setattr__(
+            '_cfg_dict',
+            Config._merge_a_into_b(
+                option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys))
+
+
+class DictAction(Action):
+    """
+    argparse action to split an argument into KEY=VALUE form
+    on the first = and append to a dictionary. List options can
+    be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
+    brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
+    list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
+    """
+
+    @staticmethod
+    def _parse_int_float_bool(val):
+        try:
+            return int(val)
+        except ValueError:
+            pass
+        try:
+            return float(val)
+        except ValueError:
+            pass
+        if val.lower() in ['true', 'false']:
+            return True if val.lower() == 'true' else False
+        return val
+
+    @staticmethod
+    def _parse_iterable(val):
+        """Parse iterable values in the string.
+
+        All elements inside '()' or '[]' are treated as iterable values.
+
+        Args:
+            val (str): Value string.
+
+        Returns:
+            list | tuple: The expanded list or tuple from the string.
+
+        Examples:
+            >>> DictAction._parse_iterable('1,2,3')
+            [1, 2, 3]
+            >>> DictAction._parse_iterable('[a, b, c]')
+            ['a', 'b', 'c']
+            >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
+            [(1, 2, 3), ['a', 'b], 'c']
+        """
+
+        def find_next_comma(string):
+            """Find the position of next comma in the string.
+
+            If no ',' is found in the string, return the string length. All
+            chars inside '()' and '[]' are treated as one element and thus ','
+            inside these brackets are ignored.
+            """
+            assert (string.count('(') == string.count(')')) and (
+                    string.count('[') == string.count(']')), \
+                f'Imbalanced brackets exist in {string}'
+            end = len(string)
+            for idx, char in enumerate(string):
+                pre = string[:idx]
+                # The string before this ',' is balanced
+                if ((char == ',') and (pre.count('(') == pre.count(')'))
+                        and (pre.count('[') == pre.count(']'))):
+                    end = idx
+                    break
+            return end
+
+        # Strip ' and " characters and replace whitespace.
+        val = val.strip('\'\"').replace(' ', '')
+        is_tuple = False
+        if val.startswith('(') and val.endswith(')'):
+            is_tuple = True
+            val = val[1:-1]
+        elif val.startswith('[') and val.endswith(']'):
+            val = val[1:-1]
+        elif ',' not in val:
+            # val is a single value
+            return DictAction._parse_int_float_bool(val)
+
+        values = []
+        while len(val) > 0:
+            comma_idx = find_next_comma(val)
+            element = DictAction._parse_iterable(val[:comma_idx])
+            values.append(element)
+            val = val[comma_idx + 1:]
+        if is_tuple:
+            values = tuple(values)
+        return values
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        options = {}
+        for kv in values:
+            key, val = kv.split('=', maxsplit=1)
+            options[key] = self._parse_iterable(val)
+        setattr(namespace, self.dest, options)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/env.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc5444282554ef571cb2c47b3352710b64f2a2f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/env.py
@@ -0,0 +1,93 @@
+"""This file holding some environment constant for sharing by other files."""
+
+import os.path as osp
+import subprocess
+import sys
+from collections import defaultdict
+
+import cv2
+import torch
+
+import mmcv
+from .parrots_wrapper import get_build_config
+
+
+def collect_env():
+    """Collect the information of the running environments.
+
+    Returns:
+        dict: The environment information. The following fields are contained.
+
+            - sys.platform: The variable of ``sys.platform``.
+            - Python: Python version.
+            - CUDA available: Bool, indicating if CUDA is available.
+            - GPU devices: Device type of each GPU.
+            - CUDA_HOME (optional): The env var ``CUDA_HOME``.
+            - NVCC (optional): NVCC version.
+            - GCC: GCC version, "n/a" if GCC is not installed.
+            - PyTorch: PyTorch version.
+            - PyTorch compiling details: The output of \
+                ``torch.__config__.show()``.
+            - TorchVision (optional): TorchVision version.
+            - OpenCV: OpenCV version.
+            - MMCV: MMCV version.
+            - MMCV Compiler: The GCC version for compiling MMCV ops.
+            - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops.
+    """
+    env_info = {}
+    env_info['sys.platform'] = sys.platform
+    env_info['Python'] = sys.version.replace('\n', '')
+
+    cuda_available = torch.cuda.is_available()
+    env_info['CUDA available'] = cuda_available
+
+    if cuda_available:
+        devices = defaultdict(list)
+        for k in range(torch.cuda.device_count()):
+            devices[torch.cuda.get_device_name(k)].append(str(k))
+        for name, device_ids in devices.items():
+            env_info['GPU ' + ','.join(device_ids)] = name
+
+        from mmcv.utils.parrots_wrapper import CUDA_HOME
+        env_info['CUDA_HOME'] = CUDA_HOME
+
+        if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
+            try:
+                nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
+                nvcc = subprocess.check_output(
+                    f'"{nvcc}" -V | tail -n1', shell=True)
+                nvcc = nvcc.decode('utf-8').strip()
+            except subprocess.SubprocessError:
+                nvcc = 'Not Available'
+            env_info['NVCC'] = nvcc
+
+    try:
+        gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
+        gcc = gcc.decode('utf-8').strip()
+        env_info['GCC'] = gcc
+    except subprocess.CalledProcessError:  # gcc is unavailable
+        env_info['GCC'] = 'n/a'
+
+    env_info['PyTorch'] = torch.__version__
+    env_info['PyTorch compiling details'] = get_build_config()
+
+    try:
+        import torchvision
+        env_info['TorchVision'] = torchvision.__version__
+    except ModuleNotFoundError:
+        pass
+
+    env_info['OpenCV'] = cv2.__version__
+
+    env_info['MMCV'] = mmcv.__version__
+
+    try:
+        from mmcv.ops import get_compiler_version, get_compiling_cuda_version
+    except ModuleNotFoundError:
+        env_info['MMCV Compiler'] = 'n/a'
+        env_info['MMCV CUDA Compiler'] = 'n/a'
+    else:
+        env_info['MMCV Compiler'] = get_compiler_version()
+        env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version()
+
+    return env_info
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/ext_loader.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/ext_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..e56651c6ddd0f30715566b759612b318529ab4f5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/ext_loader.py
@@ -0,0 +1,33 @@
+import importlib
+import os
+import pkgutil
+from collections import namedtuple
+
+import torch
+
+if torch.__version__ != 'parrots':
+
+    def load_ext(name, funcs):
+        ext = importlib.import_module('mmcv.' + name)
+        for fun in funcs:
+            assert hasattr(ext, fun), f'{fun} miss in module {name}'
+        return ext
+else:
+    from parrots import extension
+
+    def load_ext(name, funcs):
+        ExtModule = namedtuple('ExtModule', funcs)
+        ext_list = []
+        lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        for fun in funcs:
+            if fun in ['nms', 'softnms']:
+                ext_list.append(extension.load(fun, name, lib_dir=lib_root).op)
+            else:
+                ext_list.append(
+                    extension.load(fun, name, lib_dir=lib_root).op_)
+        return ExtModule(*ext_list)
+
+
+def check_ops_exist():
+    ext_loader = pkgutil.find_loader('mmcv._ext')
+    return ext_loader is not None
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/logging.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/logging.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6b566ca7563969dd531997c92d5301676aa21cc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/logging.py
@@ -0,0 +1,98 @@
+import logging
+
+import torch.distributed as dist
+
+logger_initialized = {}
+
+
+def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'):
+    """Initialize and get a logger by name.
+
+    If the logger has not been initialized, this method will initialize the
+    logger by adding one or two handlers, otherwise the initialized logger will
+    be directly returned. During initialization, a StreamHandler will always be
+    added. If `log_file` is specified and the process rank is 0, a FileHandler
+    will also be added.
+
+    Args:
+        name (str): Logger name.
+        log_file (str | None): The log filename. If specified, a FileHandler
+            will be added to the logger.
+        log_level (int): The logger level. Note that only the process of
+            rank 0 is affected, and other processes will set the level to
+            "Error" thus be silent most of the time.
+        file_mode (str): The file mode used in opening log file.
+            Defaults to 'w'.
+
+    Returns:
+        logging.Logger: The expected logger.
+    """
+    logger = logging.getLogger(name)
+    if name in logger_initialized:
+        return logger
+    # handle hierarchical names
+    # e.g., logger "a" is initialized, then logger "a.b" will skip the
+    # initialization since it is a child of "a".
+    for logger_name in logger_initialized:
+        if name.startswith(logger_name):
+            return logger
+
+    stream_handler = logging.StreamHandler()
+    handlers = [stream_handler]
+
+    if dist.is_available() and dist.is_initialized():
+        rank = dist.get_rank()
+    else:
+        rank = 0
+
+    # only rank 0 will add a FileHandler
+    if rank == 0 and log_file is not None:
+        # Here, the default behaviour of the official logger is 'a'. Thus, we
+        # provide an interface to change the file mode to the default
+        # behaviour.
+        file_handler = logging.FileHandler(log_file, file_mode)
+        handlers.append(file_handler)
+
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    for handler in handlers:
+        handler.setFormatter(formatter)
+        handler.setLevel(log_level)
+        logger.addHandler(handler)
+
+    if rank == 0:
+        logger.setLevel(log_level)
+    else:
+        logger.setLevel(logging.ERROR)
+
+    logger_initialized[name] = True
+
+    return logger
+
+
+def print_log(msg, logger=None, level=logging.INFO):
+    """Print a log message.
+
+    Args:
+        msg (str): The message to be logged.
+        logger (logging.Logger | str | None): The logger to be used.
+            Some special loggers are:
+            - "silent": no message will be printed.
+            - other str: the logger obtained with `get_root_logger(logger)`.
+            - None: The `print()` method will be used to print log messages.
+        level (int): Logging level. Only available when `logger` is a Logger
+            object or "root".
+    """
+    if logger is None:
+        print(msg)
+    elif isinstance(logger, logging.Logger):
+        logger.log(level, msg)
+    elif logger == 'silent':
+        pass
+    elif isinstance(logger, str):
+        _logger = get_logger(logger)
+        _logger.log(level, msg)
+    else:
+        raise TypeError(
+            'logger should be either a logging.Logger object, str, '
+            f'"silent" or None, but got {type(logger)}')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/misc.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..da70738b80b1c3470fdea491c02ece3b65bf4a8f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/misc.py
@@ -0,0 +1,315 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import functools
+import itertools
+import subprocess
+import warnings
+from collections import abc
+from importlib import import_module
+from inspect import getfullargspec
+
+
+def is_str(x):
+    """Whether the input is an string instance.
+
+    Note: This method is deprecated since python 2 is no longer supported.
+    """
+    return isinstance(x, str)
+
+
+def import_modules_from_strings(imports, allow_failed_imports=False):
+    """Import modules from the given list of strings.
+
+    Args:
+        imports (list | str | None): The given module names to be imported.
+        allow_failed_imports (bool): If True, the failed imports will return
+            None. Otherwise, an ImportError is raise. Default: False.
+
+    Returns:
+        list[module] | module | None: The imported modules.
+
+    Examples:
+        >>> osp, sys = import_modules_from_strings(
+        ...     ['os.path', 'sys'])
+        >>> import os.path as osp_
+        >>> import sys as sys_
+        >>> assert osp == osp_
+        >>> assert sys == sys_
+    """
+    if not imports:
+        return
+    single_import = False
+    if isinstance(imports, str):
+        single_import = True
+        imports = [imports]
+    if not isinstance(imports, list):
+        raise TypeError(
+            f'custom_imports must be a list but got type {type(imports)}')
+    imported = []
+    for imp in imports:
+        if not isinstance(imp, str):
+            raise TypeError(
+                f'{imp} is of type {type(imp)} and cannot be imported.')
+        try:
+            imported_tmp = import_module(imp)
+        except ImportError:
+            if allow_failed_imports:
+                warnings.warn(f'{imp} failed to import and is ignored.',
+                              UserWarning)
+                imported_tmp = None
+            else:
+                raise ImportError
+        imported.append(imported_tmp)
+    if single_import:
+        imported = imported[0]
+    return imported
+
+
+def iter_cast(inputs, dst_type, return_type=None):
+    """Cast elements of an iterable object into some type.
+
+    Args:
+        inputs (Iterable): The input object.
+        dst_type (type): Destination type.
+        return_type (type, optional): If specified, the output object will be
+            converted to this type, otherwise an iterator.
+
+    Returns:
+        iterator or specified type: The converted object.
+    """
+    if not isinstance(inputs, abc.Iterable):
+        raise TypeError('inputs must be an iterable object')
+    if not isinstance(dst_type, type):
+        raise TypeError('"dst_type" must be a valid type')
+
+    out_iterable = map(dst_type, inputs)
+
+    if return_type is None:
+        return out_iterable
+    else:
+        return return_type(out_iterable)
+
+
+def list_cast(inputs, dst_type):
+    """Cast elements of an iterable object into a list of some type.
+
+    A partial method of :func:`iter_cast`.
+    """
+    return iter_cast(inputs, dst_type, return_type=list)
+
+
+def tuple_cast(inputs, dst_type):
+    """Cast elements of an iterable object into a tuple of some type.
+
+    A partial method of :func:`iter_cast`.
+    """
+    return iter_cast(inputs, dst_type, return_type=tuple)
+
+
+def is_seq_of(seq, expected_type, seq_type=None):
+    """Check whether it is a sequence of some type.
+
+    Args:
+        seq (Sequence): The sequence to be checked.
+        expected_type (type): Expected type of sequence items.
+        seq_type (type, optional): Expected sequence type.
+
+    Returns:
+        bool: Whether the sequence is valid.
+    """
+    if seq_type is None:
+        exp_seq_type = abc.Sequence
+    else:
+        assert isinstance(seq_type, type)
+        exp_seq_type = seq_type
+    if not isinstance(seq, exp_seq_type):
+        return False
+    for item in seq:
+        if not isinstance(item, expected_type):
+            return False
+    return True
+
+
+def is_list_of(seq, expected_type):
+    """Check whether it is a list of some type.
+
+    A partial method of :func:`is_seq_of`.
+    """
+    return is_seq_of(seq, expected_type, seq_type=list)
+
+
+def is_tuple_of(seq, expected_type):
+    """Check whether it is a tuple of some type.
+
+    A partial method of :func:`is_seq_of`.
+    """
+    return is_seq_of(seq, expected_type, seq_type=tuple)
+
+
+def slice_list(in_list, lens):
+    """Slice a list into several sub lists by a list of given length.
+
+    Args:
+        in_list (list): The list to be sliced.
+        lens(int or list): The expected length of each out list.
+
+    Returns:
+        list: A list of sliced list.
+    """
+    if isinstance(lens, int):
+        assert len(in_list) % lens == 0
+        lens = [lens] * int(len(in_list) / lens)
+    if not isinstance(lens, list):
+        raise TypeError('"indices" must be an integer or a list of integers')
+    elif sum(lens) != len(in_list):
+        raise ValueError('sum of lens and list length does not '
+                         f'match: {sum(lens)} != {len(in_list)}')
+    out_list = []
+    idx = 0
+    for i in range(len(lens)):
+        out_list.append(in_list[idx:idx + lens[i]])
+        idx += lens[i]
+    return out_list
+
+
+def concat_list(in_list):
+    """Concatenate a list of list into a single list.
+
+    Args:
+        in_list (list): The list of list to be merged.
+
+    Returns:
+        list: The concatenated flat list.
+    """
+    return list(itertools.chain(*in_list))
+
+
+def check_prerequisites(
+        prerequisites,
+        checker,
+        msg_tmpl='Prerequisites "{}" are required in method "{}" but not '
+        'found, please install them first.'):  # yapf: disable
+    """A decorator factory to check if prerequisites are satisfied.
+
+    Args:
+        prerequisites (str of list[str]): Prerequisites to be checked.
+        checker (callable): The checker method that returns True if a
+            prerequisite is meet, False otherwise.
+        msg_tmpl (str): The message template with two variables.
+
+    Returns:
+        decorator: A specific decorator.
+    """
+
+    def wrap(func):
+
+        @functools.wraps(func)
+        def wrapped_func(*args, **kwargs):
+            requirements = [prerequisites] if isinstance(
+                prerequisites, str) else prerequisites
+            missing = []
+            for item in requirements:
+                if not checker(item):
+                    missing.append(item)
+            if missing:
+                print(msg_tmpl.format(', '.join(missing), func.__name__))
+                raise RuntimeError('Prerequisites not meet.')
+            else:
+                return func(*args, **kwargs)
+
+        return wrapped_func
+
+    return wrap
+
+
+def _check_py_package(package):
+    try:
+        import_module(package)
+    except ImportError:
+        return False
+    else:
+        return True
+
+
+def _check_executable(cmd):
+    if subprocess.call(f'which {cmd}', shell=True) != 0:
+        return False
+    else:
+        return True
+
+
+def requires_package(prerequisites):
+    """A decorator to check if some python packages are installed.
+
+    Example:
+        >>> @requires_package('numpy')
+        >>> func(arg1, args):
+        >>>     return numpy.zeros(1)
+        array([0.])
+        >>> @requires_package(['numpy', 'non_package'])
+        >>> func(arg1, args):
+        >>>     return numpy.zeros(1)
+        ImportError
+    """
+    return check_prerequisites(prerequisites, checker=_check_py_package)
+
+
+def requires_executable(prerequisites):
+    """A decorator to check if some executable files are installed.
+
+    Example:
+        >>> @requires_executable('ffmpeg')
+        >>> func(arg1, args):
+        >>>     print(1)
+        1
+    """
+    return check_prerequisites(prerequisites, checker=_check_executable)
+
+
+def deprecated_api_warning(name_dict, cls_name=None):
+    """A decorator to check if some argments are deprecate and try to replace
+    deprecate src_arg_name to dst_arg_name.
+
+    Args:
+        name_dict(dict):
+            key (str): Deprecate argument names.
+            val (str): Expected argument names.
+
+    Returns:
+        func: New function.
+    """
+
+    def api_warning_wrapper(old_func):
+
+        @functools.wraps(old_func)
+        def new_func(*args, **kwargs):
+            # get the arg spec of the decorated method
+            args_info = getfullargspec(old_func)
+            # get name of the function
+            func_name = old_func.__name__
+            if cls_name is not None:
+                func_name = f'{cls_name}.{func_name}'
+            if args:
+                arg_names = args_info.args[:len(args)]
+                for src_arg_name, dst_arg_name in name_dict.items():
+                    if src_arg_name in arg_names:
+                        warnings.warn(
+                            f'"{src_arg_name}" is deprecated in '
+                            f'`{func_name}`, please use "{dst_arg_name}" '
+                            'instead')
+                        arg_names[arg_names.index(src_arg_name)] = dst_arg_name
+            if kwargs:
+                for src_arg_name, dst_arg_name in name_dict.items():
+                    if src_arg_name in kwargs:
+                        warnings.warn(
+                            f'"{src_arg_name}" is deprecated in '
+                            f'`{func_name}`, please use "{dst_arg_name}" '
+                            'instead')
+                        kwargs[dst_arg_name] = kwargs.pop(src_arg_name)
+
+            # apply converted arguments to the decorated method
+            output = old_func(*args, **kwargs)
+            return output
+
+        return new_func
+
+    return api_warning_wrapper
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/parrots_jit.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/parrots_jit.py
new file mode 100644
index 0000000000000000000000000000000000000000..440d4bcbd26f0f4196b07d4e0bdbce7ef044d095
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/parrots_jit.py
@@ -0,0 +1,40 @@
+import os
+
+from .parrots_wrapper import TORCH_VERSION
+
+parrots_jit_option = os.getenv('PARROTS_JIT_OPTION')
+
+if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON':
+    from parrots.jit import pat as jit
+else:
+
+    def jit(func=None,
+            check_input=None,
+            full_shape=True,
+            derivate=False,
+            coderize=False,
+            optimize=False):
+
+        def wrapper(func):
+
+            def wrapper_inner(*args, **kargs):
+                return func(*args, **kargs)
+
+            return wrapper_inner
+
+        if func is None:
+            return wrapper
+        else:
+            return func
+
+
+if TORCH_VERSION == 'parrots':
+    from parrots.utils.tester import skip_no_elena
+else:
+
+    def skip_no_elena(func):
+
+        def wrapper(*args, **kargs):
+            return func(*args, **kargs)
+
+        return wrapper
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/parrots_wrapper.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/parrots_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..25761be835b2fd63831c490daf3b202160b188cc
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/parrots_wrapper.py
@@ -0,0 +1,95 @@
+from functools import partial
+
+import torch
+
+TORCH_VERSION = torch.__version__
+
+
+def _get_cuda_home():
+    if TORCH_VERSION == 'parrots':
+        from parrots.utils.build_extension import CUDA_HOME
+    else:
+        from torch.utils.cpp_extension import CUDA_HOME
+    return CUDA_HOME
+
+
+def get_build_config():
+    if TORCH_VERSION == 'parrots':
+        from parrots.config import get_build_info
+        return get_build_info()
+    else:
+        return torch.__config__.show()
+
+
+def _get_conv():
+    if TORCH_VERSION == 'parrots':
+        from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin
+    else:
+        from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin
+    return _ConvNd, _ConvTransposeMixin
+
+
+def _get_dataloader():
+    if TORCH_VERSION == 'parrots':
+        from torch.utils.data import DataLoader, PoolDataLoader
+    else:
+        from torch.utils.data import DataLoader
+        PoolDataLoader = DataLoader
+    return DataLoader, PoolDataLoader
+
+
+def _get_extension():
+    if TORCH_VERSION == 'parrots':
+        from parrots.utils.build_extension import BuildExtension, Extension
+        CppExtension = partial(Extension, cuda=False)
+        CUDAExtension = partial(Extension, cuda=True)
+    else:
+        from torch.utils.cpp_extension import (BuildExtension, CppExtension,
+                                               CUDAExtension)
+    return BuildExtension, CppExtension, CUDAExtension
+
+
+def _get_pool():
+    if TORCH_VERSION == 'parrots':
+        from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd,
+                                             _AdaptiveMaxPoolNd, _AvgPoolNd,
+                                             _MaxPoolNd)
+    else:
+        from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd,
+                                              _AdaptiveMaxPoolNd, _AvgPoolNd,
+                                              _MaxPoolNd)
+    return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd
+
+
+def _get_norm():
+    if TORCH_VERSION == 'parrots':
+        from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm
+        SyncBatchNorm_ = torch.nn.SyncBatchNorm2d
+    else:
+        from torch.nn.modules.instancenorm import _InstanceNorm
+        from torch.nn.modules.batchnorm import _BatchNorm
+        SyncBatchNorm_ = torch.nn.SyncBatchNorm
+    return _BatchNorm, _InstanceNorm, SyncBatchNorm_
+
+
+CUDA_HOME = _get_cuda_home()
+_ConvNd, _ConvTransposeMixin = _get_conv()
+DataLoader, PoolDataLoader = _get_dataloader()
+BuildExtension, CppExtension, CUDAExtension = _get_extension()
+_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm()
+_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool()
+
+
+class SyncBatchNorm(SyncBatchNorm_):
+
+    def _specify_ddp_gpu_num(self, gpu_size):
+        if TORCH_VERSION != 'parrots':
+            super()._specify_ddp_gpu_num(gpu_size)
+
+    def _check_input_dim(self, input):
+        if TORCH_VERSION == 'parrots':
+            if input.dim() < 2:
+                raise ValueError(
+                    f'expected at least 2D input (got {input.dim()}D input)')
+        else:
+            super()._check_input_dim(input)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/path.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/path.py
new file mode 100644
index 0000000000000000000000000000000000000000..aed078fe982e73f00bdac9f4e4f289f4352c6b99
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/path.py
@@ -0,0 +1,98 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os
+import os.path as osp
+from pathlib import Path
+
+from .misc import is_str
+
+
+def is_filepath(x):
+    return is_str(x) or isinstance(x, Path)
+
+
+def fopen(filepath, *args, **kwargs):
+    if is_str(filepath):
+        return open(filepath, *args, **kwargs)
+    elif isinstance(filepath, Path):
+        return filepath.open(*args, **kwargs)
+    raise ValueError('`filepath` should be a string or a Path')
+
+
+def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
+    if not osp.isfile(filename):
+        raise FileNotFoundError(msg_tmpl.format(filename))
+
+
+def mkdir_or_exist(dir_name, mode=0o777):
+    if dir_name == '':
+        return
+    dir_name = osp.expanduser(dir_name)
+    os.makedirs(dir_name, mode=mode, exist_ok=True)
+
+
+def symlink(src, dst, overwrite=True, **kwargs):
+    if os.path.lexists(dst) and overwrite:
+        os.remove(dst)
+    os.symlink(src, dst, **kwargs)
+
+
+def scandir(dir_path, suffix=None, recursive=False):
+    """Scan a directory to find the interested files.
+
+    Args:
+        dir_path (str | obj:`Path`): Path of the directory.
+        suffix (str | tuple(str), optional): File suffix that we are
+            interested in. Default: None.
+        recursive (bool, optional): If set to True, recursively scan the
+            directory. Default: False.
+
+    Returns:
+        A generator for all the interested files with relative pathes.
+    """
+    if isinstance(dir_path, (str, Path)):
+        dir_path = str(dir_path)
+    else:
+        raise TypeError('"dir_path" must be a string or Path object')
+
+    if (suffix is not None) and not isinstance(suffix, (str, tuple)):
+        raise TypeError('"suffix" must be a string or tuple of strings')
+
+    root = dir_path
+
+    def _scandir(dir_path, suffix, recursive):
+        for entry in os.scandir(dir_path):
+            if not entry.name.startswith('.') and entry.is_file():
+                rel_path = osp.relpath(entry.path, root)
+                if suffix is None:
+                    yield rel_path
+                elif rel_path.endswith(suffix):
+                    yield rel_path
+            else:
+                if recursive:
+                    yield from _scandir(
+                        entry.path, suffix=suffix, recursive=recursive)
+                else:
+                    continue
+
+    return _scandir(dir_path, suffix=suffix, recursive=recursive)
+
+
+def find_vcs_root(path, markers=('.git', )):
+    """Finds the root directory (including itself) of specified markers.
+
+    Args:
+        path (str): Path of directory or file.
+        markers (list[str], optional): List of file or directory names.
+
+    Returns:
+        The directory contained one of the markers or None if not found.
+    """
+    if osp.isfile(path):
+        path = osp.dirname(path)
+
+    prev, cur = None, osp.abspath(osp.expanduser(path))
+    while cur != prev:
+        if any(osp.exists(osp.join(cur, marker)) for marker in markers):
+            return cur
+        prev, cur = cur, osp.split(cur)[0]
+    return None
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/progressbar.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/progressbar.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2044096c27888f7a18a7ed13b4d05a1db45f6ab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/progressbar.py
@@ -0,0 +1,208 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import sys
+from collections.abc import Iterable
+from multiprocessing import Pool
+from shutil import get_terminal_size
+
+from .timer import Timer
+
+
+class ProgressBar:
+    """A progress bar which can print the progress."""
+
+    def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout):
+        self.task_num = task_num
+        self.bar_width = bar_width
+        self.completed = 0
+        self.file = file
+        if start:
+            self.start()
+
+    @property
+    def terminal_width(self):
+        width, _ = get_terminal_size()
+        return width
+
+    def start(self):
+        if self.task_num > 0:
+            self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, '
+                            'elapsed: 0s, ETA:')
+        else:
+            self.file.write('completed: 0, elapsed: 0s')
+        self.file.flush()
+        self.timer = Timer()
+
+    def update(self, num_tasks=1):
+        assert num_tasks > 0
+        self.completed += num_tasks
+        elapsed = self.timer.since_start()
+        if elapsed > 0:
+            fps = self.completed / elapsed
+        else:
+            fps = float('inf')
+        if self.task_num > 0:
+            percentage = self.completed / float(self.task_num)
+            eta = int(elapsed * (1 - percentage) / percentage + 0.5)
+            msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \
+                  f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \
+                  f'ETA: {eta:5}s'
+
+            bar_width = min(self.bar_width,
+                            int(self.terminal_width - len(msg)) + 2,
+                            int(self.terminal_width * 0.6))
+            bar_width = max(2, bar_width)
+            mark_width = int(bar_width * percentage)
+            bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width)
+            self.file.write(msg.format(bar_chars))
+        else:
+            self.file.write(
+                f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,'
+                f' {fps:.1f} tasks/s')
+        self.file.flush()
+
+
+def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs):
+    """Track the progress of tasks execution with a progress bar.
+
+    Tasks are done with a simple for-loop.
+
+    Args:
+        func (callable): The function to be applied to each task.
+        tasks (list or tuple[Iterable, int]): A list of tasks or
+            (tasks, total num).
+        bar_width (int): Width of progress bar.
+
+    Returns:
+        list: The task results.
+    """
+    if isinstance(tasks, tuple):
+        assert len(tasks) == 2
+        assert isinstance(tasks[0], Iterable)
+        assert isinstance(tasks[1], int)
+        task_num = tasks[1]
+        tasks = tasks[0]
+    elif isinstance(tasks, Iterable):
+        task_num = len(tasks)
+    else:
+        raise TypeError(
+            '"tasks" must be an iterable object or a (iterator, int) tuple')
+    prog_bar = ProgressBar(task_num, bar_width, file=file)
+    results = []
+    for task in tasks:
+        results.append(func(task, **kwargs))
+        prog_bar.update()
+    prog_bar.file.write('\n')
+    return results
+
+
+def init_pool(process_num, initializer=None, initargs=None):
+    if initializer is None:
+        return Pool(process_num)
+    elif initargs is None:
+        return Pool(process_num, initializer)
+    else:
+        if not isinstance(initargs, tuple):
+            raise TypeError('"initargs" must be a tuple')
+        return Pool(process_num, initializer, initargs)
+
+
+def track_parallel_progress(func,
+                            tasks,
+                            nproc,
+                            initializer=None,
+                            initargs=None,
+                            bar_width=50,
+                            chunksize=1,
+                            skip_first=False,
+                            keep_order=True,
+                            file=sys.stdout):
+    """Track the progress of parallel task execution with a progress bar.
+
+    The built-in :mod:`multiprocessing` module is used for process pools and
+    tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`.
+
+    Args:
+        func (callable): The function to be applied to each task.
+        tasks (list or tuple[Iterable, int]): A list of tasks or
+            (tasks, total num).
+        nproc (int): Process (worker) number.
+        initializer (None or callable): Refer to :class:`multiprocessing.Pool`
+            for details.
+        initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for
+            details.
+        chunksize (int): Refer to :class:`multiprocessing.Pool` for details.
+        bar_width (int): Width of progress bar.
+        skip_first (bool): Whether to skip the first sample for each worker
+            when estimating fps, since the initialization step may takes
+            longer.
+        keep_order (bool): If True, :func:`Pool.imap` is used, otherwise
+            :func:`Pool.imap_unordered` is used.
+
+    Returns:
+        list: The task results.
+    """
+    if isinstance(tasks, tuple):
+        assert len(tasks) == 2
+        assert isinstance(tasks[0], Iterable)
+        assert isinstance(tasks[1], int)
+        task_num = tasks[1]
+        tasks = tasks[0]
+    elif isinstance(tasks, Iterable):
+        task_num = len(tasks)
+    else:
+        raise TypeError(
+            '"tasks" must be an iterable object or a (iterator, int) tuple')
+    pool = init_pool(nproc, initializer, initargs)
+    start = not skip_first
+    task_num -= nproc * chunksize * int(skip_first)
+    prog_bar = ProgressBar(task_num, bar_width, start, file=file)
+    results = []
+    if keep_order:
+        gen = pool.imap(func, tasks, chunksize)
+    else:
+        gen = pool.imap_unordered(func, tasks, chunksize)
+    for result in gen:
+        results.append(result)
+        if skip_first:
+            if len(results) < nproc * chunksize:
+                continue
+            elif len(results) == nproc * chunksize:
+                prog_bar.start()
+                continue
+        prog_bar.update()
+    prog_bar.file.write('\n')
+    pool.close()
+    pool.join()
+    return results
+
+
+def track_iter_progress(tasks, bar_width=50, file=sys.stdout):
+    """Track the progress of tasks iteration or enumeration with a progress
+    bar.
+
+    Tasks are yielded with a simple for-loop.
+
+    Args:
+        tasks (list or tuple[Iterable, int]): A list of tasks or
+            (tasks, total num).
+        bar_width (int): Width of progress bar.
+
+    Yields:
+        list: The task results.
+    """
+    if isinstance(tasks, tuple):
+        assert len(tasks) == 2
+        assert isinstance(tasks[0], Iterable)
+        assert isinstance(tasks[1], int)
+        task_num = tasks[1]
+        tasks = tasks[0]
+    elif isinstance(tasks, Iterable):
+        task_num = len(tasks)
+    else:
+        raise TypeError(
+            '"tasks" must be an iterable object or a (iterator, int) tuple')
+    prog_bar = ProgressBar(task_num, bar_width, file=file)
+    for task in tasks:
+        yield task
+        prog_bar.update()
+    prog_bar.file.write('\n')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/registry.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..5894ad25f2053f9efeacf52bdfee3c62b4090719
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/registry.py
@@ -0,0 +1,182 @@
+import inspect
+import warnings
+from functools import partial
+
+from .misc import is_seq_of
+
+
+class Registry:
+    """A registry to map strings to classes.
+
+    Args:
+        name (str): Registry name.
+    """
+
+    def __init__(self, name):
+        self._name = name
+        self._module_dict = dict()
+
+    def __len__(self):
+        return len(self._module_dict)
+
+    def __contains__(self, key):
+        return self.get(key) is not None
+
+    def __repr__(self):
+        format_str = self.__class__.__name__ + \
+                     f'(name={self._name}, ' \
+                     f'items={self._module_dict})'
+        return format_str
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def module_dict(self):
+        return self._module_dict
+
+    def get(self, key):
+        """Get the registry record.
+
+        Args:
+            key (str): The class name in string format.
+
+        Returns:
+            class: The corresponding class.
+        """
+        return self._module_dict.get(key, None)
+
+    def _register_module(self, module_class, module_name=None, force=False):
+        if not inspect.isclass(module_class):
+            raise TypeError('module must be a class, '
+                            f'but got {type(module_class)}')
+
+        if module_name is None:
+            module_name = module_class.__name__
+        if isinstance(module_name, str):
+            module_name = [module_name]
+        else:
+            assert is_seq_of(
+                module_name,
+                str), ('module_name should be either of None, an '
+                       f'instance of str or list, but got {type(module_name)}')
+        for name in module_name:
+            if not force and name in self._module_dict:
+                raise KeyError(f'{name} is already registered '
+                               f'in {self.name}')
+            self._module_dict[name] = module_class
+
+    def deprecated_register_module(self, cls=None, force=False):
+        warnings.warn(
+            'The old API of register_module(module, force=False) '
+            'is deprecated and will be removed, please use the new API '
+            'register_module(name=None, force=False, module=None) instead.')
+        if cls is None:
+            return partial(self.deprecated_register_module, force=force)
+        self._register_module(cls, force=force)
+        return cls
+
+    def register_module(self, name=None, force=False, module=None):
+        """Register a module.
+
+        A record will be added to `self._module_dict`, whose key is the class
+        name or the specified name, and value is the class itself.
+        It can be used as a decorator or a normal function.
+
+        Example:
+            >>> backbones = Registry('backbone')
+            >>> @backbones.register_module()
+            >>> class ResNet:
+            >>>     pass
+
+            >>> backbones = Registry('backbone')
+            >>> @backbones.register_module(name='mnet')
+            >>> class MobileNet:
+            >>>     pass
+
+            >>> backbones = Registry('backbone')
+            >>> class ResNet:
+            >>>     pass
+            >>> backbones.register_module(ResNet)
+
+        Args:
+            name (str | None): The module name to be registered. If not
+                specified, the class name will be used.
+            force (bool, optional): Whether to override an existing class with
+                the same name. Default: False.
+            module (type): Module class to be registered.
+        """
+        if not isinstance(force, bool):
+            raise TypeError(f'force must be a boolean, but got {type(force)}')
+        # NOTE: This is a walkaround to be compatible with the old api,
+        # while it may introduce unexpected bugs.
+        if isinstance(name, type):
+            return self.deprecated_register_module(name, force=force)
+
+        # use it as a normal method: x.register_module(module=SomeClass)
+        if module is not None:
+            self._register_module(
+                module_class=module, module_name=name, force=force)
+            return module
+
+        # raise the error ahead of time
+        if not (name is None or isinstance(name, str)):
+            raise TypeError(f'name must be a str, but got {type(name)}')
+
+        # use it as a decorator: @x.register_module()
+        def _register(cls):
+            self._register_module(
+                module_class=cls, module_name=name, force=force)
+            return cls
+
+        return _register
+
+
+def build_from_cfg(cfg, registry, default_args=None):
+    """Build a module from config dict.
+
+    Args:
+        cfg (dict): Config dict. It should at least contain the key "type".
+        registry (:obj:`Registry`): The registry to search the type from.
+        default_args (dict, optional): Default initialization arguments.
+
+    Returns:
+        object: The constructed object.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
+    if 'type' not in cfg:
+        if default_args is None or 'type' not in default_args:
+            raise KeyError(
+                '`cfg` or `default_args` must contain the key "type", '
+                f'but got {cfg}\n{default_args}')
+    if not isinstance(registry, Registry):
+        raise TypeError('registry must be an mmcv.Registry object, '
+                        f'but got {type(registry)}')
+    if not (isinstance(default_args, dict) or default_args is None):
+        raise TypeError('default_args must be a dict or None, '
+                        f'but got {type(default_args)}')
+
+    args = cfg.copy()
+
+    if default_args is not None:
+        for name, value in default_args.items():
+            args.setdefault(name, value)
+
+    obj_type = args.pop('type')
+    if isinstance(obj_type, str):
+        obj_cls = registry.get(obj_type)
+        if obj_cls is None:
+            raise KeyError(
+                f'{obj_type} is not in the {registry.name} registry')
+    elif inspect.isclass(obj_type):
+        obj_cls = obj_type
+    else:
+        raise TypeError(
+            f'type must be a str or valid type, but got {type(obj_type)}')
+    try:
+        return obj_cls(**args)
+    except Exception as e:
+        # Normal TypeError does not print class name.
+        raise type(e)(f'{obj_cls.__name__}: {e}')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/testing.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..063e15987f092014392a6dd46ef5a03808f0a8e6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/testing.py
@@ -0,0 +1,121 @@
+# Copyright (c) Open-MMLab.
+from collections.abc import Iterable
+from typing import Any, Dict, List
+
+
+def _any(judge_result):
+    """Since built-in ``any`` works only when the element of iterable is not
+    iterable, implement the function."""
+    if not isinstance(judge_result, Iterable):
+        return judge_result
+
+    try:
+        for element in judge_result:
+            if _any(element):
+                return True
+    except TypeError:
+        # Maybe encouter the case: torch.tensor(True) | torch.tensor(False)
+        if judge_result:
+            return True
+    return False
+
+
+def assert_dict_contains_subset(dict_obj: Dict[Any, Any],
+                                expected_subset: Dict[Any, Any]) -> bool:
+    """Check if the dict_obj contains the expected_subset.
+
+    Args:
+        dict_obj (Dict[Any, Any]): Dict object to be checked.
+        expected_subset (Dict[Any, Any]): Subset expected to be contained in
+            dict_obj.
+
+    Returns:
+        bool: Whether the dict_obj contains the expected_subset.
+    """
+
+    for key, value in expected_subset.items():
+        if key not in dict_obj.keys() or _any(dict_obj[key] != value):
+            return False
+    return True
+
+
+def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool:
+    """Check if attribute of class object is correct.
+
+    Args:
+        obj (object): Class object to be checked.
+        expected_attrs (Dict[str, Any]): Dict of the expected attrs.
+
+    Returns:
+        bool: Whether the attribute of class object is correct.
+    """
+    for attr, value in expected_attrs.items():
+        if not hasattr(obj, attr) or _any(getattr(obj, attr) != value):
+            return False
+    return True
+
+
+def assert_dict_has_keys(obj: Dict[str, Any],
+                         expected_keys: List[str]) -> bool:
+    """Check if the obj has all the expected_keys.
+
+    Args:
+        obj (Dict[str, Any]): Object to be checked.
+        expected_keys (List[str]): Keys expected to contained in the keys of
+            the obj.
+
+    Returns:
+        bool: Whether the obj has the expected keys.
+    """
+    return set(expected_keys).issubset(set(obj.keys()))
+
+
+def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool:
+    """Check if target_keys is equal to result_keys.
+
+    Args:
+        result_keys (List[str]): Result keys to be checked.
+        target_keys (List[str]): Target keys to be checked.
+
+    Returns:
+        bool: Whether target_keys is equal to result_keys.
+    """
+    return set(result_keys) == set(target_keys)
+
+
+def assert_is_norm_layer(module) -> bool:
+    """Check if the module is a norm layer.
+
+    Args:
+        module (nn.Module): The module to be checked.
+
+    Returns:
+        bool: Whether the module is a norm layer.
+    """
+    from .parrots_wrapper import _BatchNorm, _InstanceNorm
+    from torch.nn import GroupNorm, LayerNorm
+    norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)
+    return isinstance(module, norm_layer_candidates)
+
+
+def assert_params_all_zeros(module) -> bool:
+    """Check if the parameters of the module is all zeros.
+
+    Args:
+        module (nn.Module): The module to be checked.
+
+    Returns:
+        bool: Whether the parameters of the module is all zeros.
+    """
+    weight_data = module.weight.data
+    is_weight_zero = weight_data.allclose(
+        weight_data.new_zeros(weight_data.size()))
+
+    if hasattr(module, 'bias') and module.bias is not None:
+        bias_data = module.bias.data
+        is_bias_zero = bias_data.allclose(
+            bias_data.new_zeros(bias_data.size()))
+    else:
+        is_bias_zero = True
+
+    return is_weight_zero and is_bias_zero
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/timer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c7f50c18f5b895010921bc462b435c5a7e7f029
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/timer.py
@@ -0,0 +1,118 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from time import time
+
+
+class TimerError(Exception):
+
+    def __init__(self, message):
+        self.message = message
+        super(TimerError, self).__init__(message)
+
+
+class Timer:
+    """A flexible Timer class.
+
+    :Example:
+
+    >>> import time
+    >>> import mmcv
+    >>> with mmcv.Timer():
+    >>>     # simulate a code block that will run for 1s
+    >>>     time.sleep(1)
+    1.000
+    >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'):
+    >>>     # simulate a code block that will run for 1s
+    >>>     time.sleep(1)
+    it takes 1.0 seconds
+    >>> timer = mmcv.Timer()
+    >>> time.sleep(0.5)
+    >>> print(timer.since_start())
+    0.500
+    >>> time.sleep(0.5)
+    >>> print(timer.since_last_check())
+    0.500
+    >>> print(timer.since_start())
+    1.000
+    """
+
+    def __init__(self, start=True, print_tmpl=None):
+        self._is_running = False
+        self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}'
+        if start:
+            self.start()
+
+    @property
+    def is_running(self):
+        """bool: indicate whether the timer is running"""
+        return self._is_running
+
+    def __enter__(self):
+        self.start()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        print(self.print_tmpl.format(self.since_last_check()))
+        self._is_running = False
+
+    def start(self):
+        """Start the timer."""
+        if not self._is_running:
+            self._t_start = time()
+            self._is_running = True
+        self._t_last = time()
+
+    def since_start(self):
+        """Total time since the timer is started.
+
+        Returns (float): Time in seconds.
+        """
+        if not self._is_running:
+            raise TimerError('timer is not running')
+        self._t_last = time()
+        return self._t_last - self._t_start
+
+    def since_last_check(self):
+        """Time since the last checking.
+
+        Either :func:`since_start` or :func:`since_last_check` is a checking
+        operation.
+
+        Returns (float): Time in seconds.
+        """
+        if not self._is_running:
+            raise TimerError('timer is not running')
+        dur = time() - self._t_last
+        self._t_last = time()
+        return dur
+
+
+_g_timers = {}  # global timers
+
+
+def check_time(timer_id):
+    """Add check points in a single line.
+
+    This method is suitable for running a task on a list of items. A timer will
+    be registered when the method is called for the first time.
+
+    :Example:
+
+    >>> import time
+    >>> import mmcv
+    >>> for i in range(1, 6):
+    >>>     # simulate a code block
+    >>>     time.sleep(i)
+    >>>     mmcv.check_time('task1')
+    2.000
+    3.000
+    4.000
+    5.000
+
+    Args:
+        timer_id (str): Timer identifier.
+    """
+    if timer_id not in _g_timers:
+        _g_timers[timer_id] = Timer()
+        return 0
+    else:
+        return _g_timers[timer_id].since_last_check()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/version_utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/version_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..585c9b7c769be0421ea6e3e9adfa2c06cabb2915
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/utils/version_utils.py
@@ -0,0 +1,67 @@
+import os
+import subprocess
+
+
+def digit_version(version_str):
+    """Convert a version string into a tuple of integers.
+
+    This method is usually used for comparing two versions.
+
+    Args:
+        version_str (str): The version string.
+
+    Returns:
+        tuple[int]: The version info in digits (integers).
+    """
+    digit_version = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            digit_version.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            digit_version.append(int(patch_version[0]) - 1)
+            digit_version.append(int(patch_version[1]))
+    return tuple(digit_version)
+
+
+def _minimal_ext_cmd(cmd):
+    # construct minimal environment
+    env = {}
+    for k in ['SYSTEMROOT', 'PATH', 'HOME']:
+        v = os.environ.get(k)
+        if v is not None:
+            env[k] = v
+    # LANGUAGE is used on win32
+    env['LANGUAGE'] = 'C'
+    env['LANG'] = 'C'
+    env['LC_ALL'] = 'C'
+    out = subprocess.Popen(
+        cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
+    return out
+
+
+def get_git_hash(fallback='unknown', digits=None):
+    """Get the git hash of the current repo.
+
+    Args:
+        fallback (str, optional): The fallback string when git hash is
+            unavailable. Defaults to 'unknown'.
+        digits (int, optional): kept digits of the hash. Defaults to None,
+            meaning all digits are kept.
+
+    Returns:
+        str: Git commit hash.
+    """
+
+    if digits is not None and not isinstance(digits, int):
+        raise TypeError('digits must be None or an integer')
+
+    try:
+        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
+        sha = out.strip().decode('ascii')
+        if digits is not None:
+            sha = sha[:digits]
+    except OSError:
+        sha = fallback
+
+    return sha
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/version.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ab08e95458f98e478909acb835099c6c3ceafce
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/version.py
@@ -0,0 +1,29 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+
+__version__ = '1.2.7'
+
+
+def parse_version_info(version_str):
+    """Parse a version string into a tuple.
+
+    Args:
+        version_str (str): The version string.
+
+    Returns:
+        tuple[int | str]: The version info, e.g., "1.3.0" is parsed into
+            (1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1').
+    """
+    version_info = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            version_info.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            version_info.append(int(patch_version[0]))
+            version_info.append(f'rc{patch_version[1]}')
+    return tuple(version_info)
+
+
+version_info = parse_version_info(__version__)
+
+__all__ = ['__version__', 'version_info', 'parse_version_info']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..28f2d7e937b283720723114d7b150f9d495b4ac1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .io import Cache, VideoReader, frames2video
+from .optflow import (dequantize_flow, flow_warp, flowread, flowwrite,
+                      quantize_flow)
+from .processing import concat_video, convert_video, cut_video, resize_video
+
+__all__ = [
+    'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video',
+    'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow',
+    'dequantize_flow', 'flow_warp'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/io.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..08b78cd586658bcafe422947c2c72f353562dc22
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/io.py
@@ -0,0 +1,323 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os.path as osp
+from collections import OrderedDict
+
+import cv2
+from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT,
+                 CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH,
+                 CAP_PROP_POS_FRAMES, VideoWriter_fourcc)
+
+from mmcv.utils import (check_file_exist, mkdir_or_exist, scandir,
+                        track_progress)
+
+
+class Cache:
+
+    def __init__(self, capacity):
+        self._cache = OrderedDict()
+        self._capacity = int(capacity)
+        if capacity <= 0:
+            raise ValueError('capacity must be a positive integer')
+
+    @property
+    def capacity(self):
+        return self._capacity
+
+    @property
+    def size(self):
+        return len(self._cache)
+
+    def put(self, key, val):
+        if key in self._cache:
+            return
+        if len(self._cache) >= self.capacity:
+            self._cache.popitem(last=False)
+        self._cache[key] = val
+
+    def get(self, key, default=None):
+        val = self._cache[key] if key in self._cache else default
+        return val
+
+
+class VideoReader:
+    """Video class with similar usage to a list object.
+
+    This video warpper class provides convenient apis to access frames.
+    There exists an issue of OpenCV's VideoCapture class that jumping to a
+    certain frame may be inaccurate. It is fixed in this class by checking
+    the position after jumping each time.
+    Cache is used when decoding videos. So if the same frame is visited for
+    the second time, there is no need to decode again if it is stored in the
+    cache.
+
+    :Example:
+
+    >>> import mmcv
+    >>> v = mmcv.VideoReader('sample.mp4')
+    >>> len(v)  # get the total frame number with `len()`
+    120
+    >>> for img in v:  # v is iterable
+    >>>     mmcv.imshow(img)
+    >>> v[5]  # get the 6th frame
+    """
+
+    def __init__(self, filename, cache_capacity=10):
+        # Check whether the video path is a url
+        if not filename.startswith(('https://', 'http://')):
+            check_file_exist(filename, 'Video file not found: ' + filename)
+        self._vcap = cv2.VideoCapture(filename)
+        assert cache_capacity > 0
+        self._cache = Cache(cache_capacity)
+        self._position = 0
+        # get basic info
+        self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH))
+        self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT))
+        self._fps = self._vcap.get(CAP_PROP_FPS)
+        self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT))
+        self._fourcc = self._vcap.get(CAP_PROP_FOURCC)
+
+    @property
+    def vcap(self):
+        """:obj:`cv2.VideoCapture`: The raw VideoCapture object."""
+        return self._vcap
+
+    @property
+    def opened(self):
+        """bool: Indicate whether the video is opened."""
+        return self._vcap.isOpened()
+
+    @property
+    def width(self):
+        """int: Width of video frames."""
+        return self._width
+
+    @property
+    def height(self):
+        """int: Height of video frames."""
+        return self._height
+
+    @property
+    def resolution(self):
+        """tuple: Video resolution (width, height)."""
+        return (self._width, self._height)
+
+    @property
+    def fps(self):
+        """float: FPS of the video."""
+        return self._fps
+
+    @property
+    def frame_cnt(self):
+        """int: Total frames of the video."""
+        return self._frame_cnt
+
+    @property
+    def fourcc(self):
+        """str: "Four character code" of the video."""
+        return self._fourcc
+
+    @property
+    def position(self):
+        """int: Current cursor position, indicating frame decoded."""
+        return self._position
+
+    def _get_real_position(self):
+        return int(round(self._vcap.get(CAP_PROP_POS_FRAMES)))
+
+    def _set_real_position(self, frame_id):
+        self._vcap.set(CAP_PROP_POS_FRAMES, frame_id)
+        pos = self._get_real_position()
+        for _ in range(frame_id - pos):
+            self._vcap.read()
+        self._position = frame_id
+
+    def read(self):
+        """Read the next frame.
+
+        If the next frame have been decoded before and in the cache, then
+        return it directly, otherwise decode, cache and return it.
+
+        Returns:
+            ndarray or None: Return the frame if successful, otherwise None.
+        """
+        # pos = self._position
+        if self._cache:
+            img = self._cache.get(self._position)
+            if img is not None:
+                ret = True
+            else:
+                if self._position != self._get_real_position():
+                    self._set_real_position(self._position)
+                ret, img = self._vcap.read()
+                if ret:
+                    self._cache.put(self._position, img)
+        else:
+            ret, img = self._vcap.read()
+        if ret:
+            self._position += 1
+        return img
+
+    def get_frame(self, frame_id):
+        """Get frame by index.
+
+        Args:
+            frame_id (int): Index of the expected frame, 0-based.
+
+        Returns:
+            ndarray or None: Return the frame if successful, otherwise None.
+        """
+        if frame_id < 0 or frame_id >= self._frame_cnt:
+            raise IndexError(
+                f'"frame_id" must be between 0 and {self._frame_cnt - 1}')
+        if frame_id == self._position:
+            return self.read()
+        if self._cache:
+            img = self._cache.get(frame_id)
+            if img is not None:
+                self._position = frame_id + 1
+                return img
+        self._set_real_position(frame_id)
+        ret, img = self._vcap.read()
+        if ret:
+            if self._cache:
+                self._cache.put(self._position, img)
+            self._position += 1
+        return img
+
+    def current_frame(self):
+        """Get the current frame (frame that is just visited).
+
+        Returns:
+            ndarray or None: If the video is fresh, return None, otherwise
+                return the frame.
+        """
+        if self._position == 0:
+            return None
+        return self._cache.get(self._position - 1)
+
+    def cvt2frames(self,
+                   frame_dir,
+                   file_start=0,
+                   filename_tmpl='{:06d}.jpg',
+                   start=0,
+                   max_num=0,
+                   show_progress=True):
+        """Convert a video to frame images.
+
+        Args:
+            frame_dir (str): Output directory to store all the frame images.
+            file_start (int): Filenames will start from the specified number.
+            filename_tmpl (str): Filename template with the index as the
+                placeholder.
+            start (int): The starting frame index.
+            max_num (int): Maximum number of frames to be written.
+            show_progress (bool): Whether to show a progress bar.
+        """
+        mkdir_or_exist(frame_dir)
+        if max_num == 0:
+            task_num = self.frame_cnt - start
+        else:
+            task_num = min(self.frame_cnt - start, max_num)
+        if task_num <= 0:
+            raise ValueError('start must be less than total frame number')
+        if start > 0:
+            self._set_real_position(start)
+
+        def write_frame(file_idx):
+            img = self.read()
+            filename = osp.join(frame_dir, filename_tmpl.format(file_idx))
+            cv2.imwrite(filename, img)
+
+        if show_progress:
+            track_progress(write_frame, range(file_start,
+                                              file_start + task_num))
+        else:
+            for i in range(task_num):
+                img = self.read()
+                if img is None:
+                    break
+                filename = osp.join(frame_dir,
+                                    filename_tmpl.format(i + file_start))
+                cv2.imwrite(filename, img)
+
+    def __len__(self):
+        return self.frame_cnt
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return [
+                self.get_frame(i)
+                for i in range(*index.indices(self.frame_cnt))
+            ]
+        # support negative indexing
+        if index < 0:
+            index += self.frame_cnt
+            if index < 0:
+                raise IndexError('index out of range')
+        return self.get_frame(index)
+
+    def __iter__(self):
+        self._set_real_position(0)
+        return self
+
+    def __next__(self):
+        img = self.read()
+        if img is not None:
+            return img
+        else:
+            raise StopIteration
+
+    next = __next__
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._vcap.release()
+
+
+def frames2video(frame_dir,
+                 video_file,
+                 fps=30,
+                 fourcc='XVID',
+                 filename_tmpl='{:06d}.jpg',
+                 start=0,
+                 end=0,
+                 show_progress=True):
+    """Read the frame images from a directory and join them as a video.
+
+    Args:
+        frame_dir (str): The directory containing video frames.
+        video_file (str): Output filename.
+        fps (float): FPS of the output video.
+        fourcc (str): Fourcc of the output video, this should be compatible
+            with the output file type.
+        filename_tmpl (str): Filename template with the index as the variable.
+        start (int): Starting frame index.
+        end (int): Ending frame index.
+        show_progress (bool): Whether to show a progress bar.
+    """
+    if end == 0:
+        ext = filename_tmpl.split('.')[-1]
+        end = len([name for name in scandir(frame_dir, ext)])
+    first_file = osp.join(frame_dir, filename_tmpl.format(start))
+    check_file_exist(first_file, 'The start frame not found: ' + first_file)
+    img = cv2.imread(first_file)
+    height, width = img.shape[:2]
+    resolution = (width, height)
+    vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps,
+                              resolution)
+
+    def write_frame(file_idx):
+        filename = osp.join(frame_dir, filename_tmpl.format(file_idx))
+        img = cv2.imread(filename)
+        vwriter.write(img)
+
+    if show_progress:
+        track_progress(write_frame, range(start, end))
+    else:
+        for i in range(start, end):
+            filename = osp.join(frame_dir, filename_tmpl.format(i))
+            img = cv2.imread(filename)
+            vwriter.write(img)
+    vwriter.release()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/optflow.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/optflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..536417a825096269152f345372a8034340c62711
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/optflow.py
@@ -0,0 +1,200 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import warnings
+
+import numpy as np
+
+from mmcv.arraymisc import dequantize, quantize
+from mmcv.image import imread, imwrite
+from mmcv.utils import is_str
+
+
+def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs):
+    """Read an optical flow map.
+
+    Args:
+        flow_or_path (ndarray or str): A flow map or filepath.
+        quantize (bool): whether to read quantized pair, if set to True,
+            remaining args will be passed to :func:`dequantize_flow`.
+        concat_axis (int): The axis that dx and dy are concatenated,
+            can be either 0 or 1. Ignored if quantize is False.
+
+    Returns:
+        ndarray: Optical flow represented as a (h, w, 2) numpy array
+    """
+    if isinstance(flow_or_path, np.ndarray):
+        if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2):
+            raise ValueError(f'Invalid flow with shape {flow_or_path.shape}')
+        return flow_or_path
+    elif not is_str(flow_or_path):
+        raise TypeError(f'"flow_or_path" must be a filename or numpy array, '
+                        f'not {type(flow_or_path)}')
+
+    if not quantize:
+        with open(flow_or_path, 'rb') as f:
+            try:
+                header = f.read(4).decode('utf-8')
+            except Exception:
+                raise IOError(f'Invalid flow file: {flow_or_path}')
+            else:
+                if header != 'PIEH':
+                    raise IOError(f'Invalid flow file: {flow_or_path}, '
+                                  'header does not contain PIEH')
+
+            w = np.fromfile(f, np.int32, 1).squeeze()
+            h = np.fromfile(f, np.int32, 1).squeeze()
+            flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2))
+    else:
+        assert concat_axis in [0, 1]
+        cat_flow = imread(flow_or_path, flag='unchanged')
+        if cat_flow.ndim != 2:
+            raise IOError(
+                f'{flow_or_path} is not a valid quantized flow file, '
+                f'its dimension is {cat_flow.ndim}.')
+        assert cat_flow.shape[concat_axis] % 2 == 0
+        dx, dy = np.split(cat_flow, 2, axis=concat_axis)
+        flow = dequantize_flow(dx, dy, *args, **kwargs)
+
+    return flow.astype(np.float32)
+
+
+def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs):
+    """Write optical flow to file.
+
+    If the flow is not quantized, it will be saved as a .flo file losslessly,
+    otherwise a jpeg image which is lossy but of much smaller size. (dx and dy
+    will be concatenated horizontally into a single image if quantize is True.)
+
+    Args:
+        flow (ndarray): (h, w, 2) array of optical flow.
+        filename (str): Output filepath.
+        quantize (bool): Whether to quantize the flow and save it to 2 jpeg
+            images. If set to True, remaining args will be passed to
+            :func:`quantize_flow`.
+        concat_axis (int): The axis that dx and dy are concatenated,
+            can be either 0 or 1. Ignored if quantize is False.
+    """
+    if not quantize:
+        with open(filename, 'wb') as f:
+            f.write('PIEH'.encode('utf-8'))
+            np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f)
+            flow = flow.astype(np.float32)
+            flow.tofile(f)
+            f.flush()
+    else:
+        assert concat_axis in [0, 1]
+        dx, dy = quantize_flow(flow, *args, **kwargs)
+        dxdy = np.concatenate((dx, dy), axis=concat_axis)
+        imwrite(dxdy, filename)
+
+
+def quantize_flow(flow, max_val=0.02, norm=True):
+    """Quantize flow to [0, 255].
+
+    After this step, the size of flow will be much smaller, and can be
+    dumped as jpeg images.
+
+    Args:
+        flow (ndarray): (h, w, 2) array of optical flow.
+        max_val (float): Maximum value of flow, values beyond
+                        [-max_val, max_val] will be truncated.
+        norm (bool): Whether to divide flow values by image width/height.
+
+    Returns:
+        tuple[ndarray]: Quantized dx and dy.
+    """
+    h, w, _ = flow.shape
+    dx = flow[..., 0]
+    dy = flow[..., 1]
+    if norm:
+        dx = dx / w  # avoid inplace operations
+        dy = dy / h
+    # use 255 levels instead of 256 to make sure 0 is 0 after dequantization.
+    flow_comps = [
+        quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy]
+    ]
+    return tuple(flow_comps)
+
+
+def dequantize_flow(dx, dy, max_val=0.02, denorm=True):
+    """Recover from quantized flow.
+
+    Args:
+        dx (ndarray): Quantized dx.
+        dy (ndarray): Quantized dy.
+        max_val (float): Maximum value used when quantizing.
+        denorm (bool): Whether to multiply flow values with width/height.
+
+    Returns:
+        ndarray: Dequantized flow.
+    """
+    assert dx.shape == dy.shape
+    assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1)
+
+    dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]]
+
+    if denorm:
+        dx *= dx.shape[1]
+        dy *= dx.shape[0]
+    flow = np.dstack((dx, dy))
+    return flow
+
+
+def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'):
+    """Use flow to warp img.
+
+    Args:
+        img (ndarray, float or uint8): Image to be warped.
+        flow (ndarray, float): Optical Flow.
+        filling_value (int): The missing pixels will be set with filling_value.
+        interpolate_mode (str): bilinear -> Bilinear Interpolation;
+                                nearest -> Nearest Neighbor.
+
+    Returns:
+        ndarray: Warped image with the same shape of img
+    """
+    warnings.warn('This function is just for prototyping and cannot '
+                  'guarantee the computational efficiency.')
+    assert flow.ndim == 3, 'Flow must be in 3D arrays.'
+    height = flow.shape[0]
+    width = flow.shape[1]
+    channels = img.shape[2]
+
+    output = np.ones(
+        (height, width, channels), dtype=img.dtype) * filling_value
+
+    grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2)
+    dx = grid[:, :, 0] + flow[:, :, 1]
+    dy = grid[:, :, 1] + flow[:, :, 0]
+    sx = np.floor(dx).astype(int)
+    sy = np.floor(dy).astype(int)
+    valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1)
+
+    if interpolate_mode == 'nearest':
+        output[valid, :] = img[dx[valid].round().astype(int),
+                               dy[valid].round().astype(int), :]
+    elif interpolate_mode == 'bilinear':
+        # dirty walkround for integer positions
+        eps_ = 1e-6
+        dx, dy = dx + eps_, dy + eps_
+        left_top_ = img[np.floor(dx[valid]).astype(int),
+                        np.floor(dy[valid]).astype(int), :] * (
+                            np.ceil(dx[valid]) - dx[valid])[:, None] * (
+                                np.ceil(dy[valid]) - dy[valid])[:, None]
+        left_down_ = img[np.ceil(dx[valid]).astype(int),
+                         np.floor(dy[valid]).astype(int), :] * (
+                             dx[valid] - np.floor(dx[valid]))[:, None] * (
+                                 np.ceil(dy[valid]) - dy[valid])[:, None]
+        right_top_ = img[np.floor(dx[valid]).astype(int),
+                         np.ceil(dy[valid]).astype(int), :] * (
+                             np.ceil(dx[valid]) - dx[valid])[:, None] * (
+                                 dy[valid] - np.floor(dy[valid]))[:, None]
+        right_down_ = img[np.ceil(dx[valid]).astype(int),
+                          np.ceil(dy[valid]).astype(int), :] * (
+                              dx[valid] - np.floor(dx[valid]))[:, None] * (
+                                  dy[valid] - np.floor(dy[valid]))[:, None]
+        output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_
+    else:
+        raise NotImplementedError(
+            'We only support interpolation modes of nearest and bilinear, '
+            f'but got {interpolate_mode}.')
+    return output.astype(img.dtype)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/processing.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..8331572ec12910af2f735988bc68b4e05e84ae57
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/video/processing.py
@@ -0,0 +1,159 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import os
+import os.path as osp
+import subprocess
+import tempfile
+
+from mmcv.utils import requires_executable
+
+
+@requires_executable('ffmpeg')
+def convert_video(in_file,
+                  out_file,
+                  print_cmd=False,
+                  pre_options='',
+                  **kwargs):
+    """Convert a video with ffmpeg.
+
+    This provides a general api to ffmpeg, the executed command is::
+
+        `ffmpeg -y <pre_options> -i <in_file> <options> <out_file>`
+
+    Options(kwargs) are mapped to ffmpeg commands with the following rules:
+
+    - key=val: "-key val"
+    - key=True: "-key"
+    - key=False: ""
+
+    Args:
+        in_file (str): Input video filename.
+        out_file (str): Output video filename.
+        pre_options (str): Options appears before "-i <in_file>".
+        print_cmd (bool): Whether to print the final ffmpeg command.
+    """
+    options = []
+    for k, v in kwargs.items():
+        if isinstance(v, bool):
+            if v:
+                options.append(f'-{k}')
+        elif k == 'log_level':
+            assert v in [
+                'quiet', 'panic', 'fatal', 'error', 'warning', 'info',
+                'verbose', 'debug', 'trace'
+            ]
+            options.append(f'-loglevel {v}')
+        else:
+            options.append(f'-{k} {v}')
+    cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \
+          f'{out_file}'
+    if print_cmd:
+        print(cmd)
+    subprocess.call(cmd, shell=True)
+
+
+@requires_executable('ffmpeg')
+def resize_video(in_file,
+                 out_file,
+                 size=None,
+                 ratio=None,
+                 keep_ar=False,
+                 log_level='info',
+                 print_cmd=False):
+    """Resize a video.
+
+    Args:
+        in_file (str): Input video filename.
+        out_file (str): Output video filename.
+        size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1).
+        ratio (tuple or float): Expected resize ratio, (2, 0.5) means
+            (w*2, h*0.5).
+        keep_ar (bool): Whether to keep original aspect ratio.
+        log_level (str): Logging level of ffmpeg.
+        print_cmd (bool): Whether to print the final ffmpeg command.
+    """
+    if size is None and ratio is None:
+        raise ValueError('expected size or ratio must be specified')
+    if size is not None and ratio is not None:
+        raise ValueError('size and ratio cannot be specified at the same time')
+    options = {'log_level': log_level}
+    if size:
+        if not keep_ar:
+            options['vf'] = f'scale={size[0]}:{size[1]}'
+        else:
+            options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \
+                            'force_original_aspect_ratio=decrease'
+    else:
+        if not isinstance(ratio, tuple):
+            ratio = (ratio, ratio)
+        options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"'
+    convert_video(in_file, out_file, print_cmd, **options)
+
+
+@requires_executable('ffmpeg')
+def cut_video(in_file,
+              out_file,
+              start=None,
+              end=None,
+              vcodec=None,
+              acodec=None,
+              log_level='info',
+              print_cmd=False):
+    """Cut a clip from a video.
+
+    Args:
+        in_file (str): Input video filename.
+        out_file (str): Output video filename.
+        start (None or float): Start time (in seconds).
+        end (None or float): End time (in seconds).
+        vcodec (None or str): Output video codec, None for unchanged.
+        acodec (None or str): Output audio codec, None for unchanged.
+        log_level (str): Logging level of ffmpeg.
+        print_cmd (bool): Whether to print the final ffmpeg command.
+    """
+    options = {'log_level': log_level}
+    if vcodec is None:
+        options['vcodec'] = 'copy'
+    if acodec is None:
+        options['acodec'] = 'copy'
+    if start:
+        options['ss'] = start
+    else:
+        start = 0
+    if end:
+        options['t'] = end - start
+    convert_video(in_file, out_file, print_cmd, **options)
+
+
+@requires_executable('ffmpeg')
+def concat_video(video_list,
+                 out_file,
+                 vcodec=None,
+                 acodec=None,
+                 log_level='info',
+                 print_cmd=False):
+    """Concatenate multiple videos into a single one.
+
+    Args:
+        video_list (list): A list of video filenames
+        out_file (str): Output video filename
+        vcodec (None or str): Output video codec, None for unchanged
+        acodec (None or str): Output audio codec, None for unchanged
+        log_level (str): Logging level of ffmpeg.
+        print_cmd (bool): Whether to print the final ffmpeg command.
+    """
+    _, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True)
+    with open(tmp_filename, 'w') as f:
+        for filename in video_list:
+            f.write(f'file {osp.abspath(filename)}\n')
+    options = {'log_level': log_level}
+    if vcodec is None:
+        options['vcodec'] = 'copy'
+    if acodec is None:
+        options['acodec'] = 'copy'
+    convert_video(
+        tmp_filename,
+        out_file,
+        print_cmd,
+        pre_options='-f concat -safe 0',
+        **options)
+    os.remove(tmp_filename)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..38b857e111315707f35cc6dfc3ef02a9070201c5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from .color import Color, color_val
+from .image import imshow, imshow_bboxes, imshow_det_bboxes
+from .optflow import flow2rgb, flowshow, make_color_wheel
+
+__all__ = [
+    'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes',
+    'flowshow', 'flow2rgb', 'make_color_wheel'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/color.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/color.py
new file mode 100644
index 0000000000000000000000000000000000000000..44f465e02154a48adbc6db548f6e63af5dad8525
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/color.py
@@ -0,0 +1,51 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from enum import Enum
+
+import numpy as np
+
+from mmcv.utils import is_str
+
+
+class Color(Enum):
+    """An enum that defines common colors.
+
+    Contains red, green, blue, cyan, yellow, magenta, white and black.
+    """
+    red = (0, 0, 255)
+    green = (0, 255, 0)
+    blue = (255, 0, 0)
+    cyan = (255, 255, 0)
+    yellow = (0, 255, 255)
+    magenta = (255, 0, 255)
+    white = (255, 255, 255)
+    black = (0, 0, 0)
+
+
+def color_val(color):
+    """Convert various input to color tuples.
+
+    Args:
+        color (:obj:`Color`/str/tuple/int/ndarray): Color inputs
+
+    Returns:
+        tuple[int]: A tuple of 3 integers indicating BGR channels.
+    """
+    if is_str(color):
+        return Color[color].value
+    elif isinstance(color, Color):
+        return color.value
+    elif isinstance(color, tuple):
+        assert len(color) == 3
+        for channel in color:
+            assert 0 <= channel <= 255
+        return color
+    elif isinstance(color, int):
+        assert 0 <= color <= 255
+        return color, color, color
+    elif isinstance(color, np.ndarray):
+        assert color.ndim == 1 and color.size == 3
+        assert np.all((color >= 0) & (color <= 255))
+        color = color.astype(np.uint8)
+        return tuple(color)
+    else:
+        raise TypeError(f'Invalid type for color: {type(color)}')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/image.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d0a2f1ea1bacee30641fc0f6f3ac4efc05b862b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/image.py
@@ -0,0 +1,152 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import cv2
+import numpy as np
+
+from mmcv.image import imread, imwrite
+from .color import color_val
+
+
+def imshow(img, win_name='', wait_time=0):
+    """Show an image.
+
+    Args:
+        img (str or ndarray): The image to be displayed.
+        win_name (str): The window name.
+        wait_time (int): Value of waitKey param.
+    """
+    cv2.imshow(win_name, imread(img))
+    if wait_time == 0:  # prevent from hangning if windows was closed
+        while True:
+            ret = cv2.waitKey(1)
+
+            closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1
+            # if user closed window or if some key pressed
+            if closed or ret != -1:
+                break
+    else:
+        ret = cv2.waitKey(wait_time)
+
+
+def imshow_bboxes(img,
+                  bboxes,
+                  colors='green',
+                  top_k=-1,
+                  thickness=1,
+                  show=True,
+                  win_name='',
+                  wait_time=0,
+                  out_file=None):
+    """Draw bboxes on an image.
+
+    Args:
+        img (str or ndarray): The image to be displayed.
+        bboxes (list or ndarray): A list of ndarray of shape (k, 4).
+        colors (list[str or tuple or Color]): A list of colors.
+        top_k (int): Plot the first k bboxes only if set positive.
+        thickness (int): Thickness of lines.
+        show (bool): Whether to show the image.
+        win_name (str): The window name.
+        wait_time (int): Value of waitKey param.
+        out_file (str, optional): The filename to write the image.
+
+    Returns:
+        ndarray: The image with bboxes drawn on it.
+    """
+    img = imread(img)
+    img = np.ascontiguousarray(img)
+
+    if isinstance(bboxes, np.ndarray):
+        bboxes = [bboxes]
+    if not isinstance(colors, list):
+        colors = [colors for _ in range(len(bboxes))]
+    colors = [color_val(c) for c in colors]
+    assert len(bboxes) == len(colors)
+
+    for i, _bboxes in enumerate(bboxes):
+        _bboxes = _bboxes.astype(np.int32)
+        if top_k <= 0:
+            _top_k = _bboxes.shape[0]
+        else:
+            _top_k = min(top_k, _bboxes.shape[0])
+        for j in range(_top_k):
+            left_top = (_bboxes[j, 0], _bboxes[j, 1])
+            right_bottom = (_bboxes[j, 2], _bboxes[j, 3])
+            cv2.rectangle(
+                img, left_top, right_bottom, colors[i], thickness=thickness)
+
+    if show:
+        imshow(img, win_name, wait_time)
+    if out_file is not None:
+        imwrite(img, out_file)
+    return img
+
+
+def imshow_det_bboxes(img,
+                      bboxes,
+                      labels,
+                      class_names=None,
+                      score_thr=0,
+                      bbox_color='green',
+                      text_color='green',
+                      thickness=1,
+                      font_scale=0.5,
+                      show=True,
+                      win_name='',
+                      wait_time=0,
+                      out_file=None):
+    """Draw bboxes and class labels (with scores) on an image.
+
+    Args:
+        img (str or ndarray): The image to be displayed.
+        bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
+            (n, 5).
+        labels (ndarray): Labels of bboxes.
+        class_names (list[str]): Names of each classes.
+        score_thr (float): Minimum score of bboxes to be shown.
+        bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
+        text_color (str or tuple or :obj:`Color`): Color of texts.
+        thickness (int): Thickness of lines.
+        font_scale (float): Font scales of texts.
+        show (bool): Whether to show the image.
+        win_name (str): The window name.
+        wait_time (int): Value of waitKey param.
+        out_file (str or None): The filename to write the image.
+
+    Returns:
+        ndarray: The image with bboxes drawn on it.
+    """
+    assert bboxes.ndim == 2
+    assert labels.ndim == 1
+    assert bboxes.shape[0] == labels.shape[0]
+    assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5
+    img = imread(img)
+    img = np.ascontiguousarray(img)
+
+    if score_thr > 0:
+        assert bboxes.shape[1] == 5
+        scores = bboxes[:, -1]
+        inds = scores > score_thr
+        bboxes = bboxes[inds, :]
+        labels = labels[inds]
+
+    bbox_color = color_val(bbox_color)
+    text_color = color_val(text_color)
+
+    for bbox, label in zip(bboxes, labels):
+        bbox_int = bbox.astype(np.int32)
+        left_top = (bbox_int[0], bbox_int[1])
+        right_bottom = (bbox_int[2], bbox_int[3])
+        cv2.rectangle(
+            img, left_top, right_bottom, bbox_color, thickness=thickness)
+        label_text = class_names[
+            label] if class_names is not None else f'cls {label}'
+        if len(bbox) > 4:
+            label_text += f'|{bbox[-1]:.02f}'
+        cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
+                    cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)
+
+    if show:
+        imshow(img, win_name, wait_time)
+    if out_file is not None:
+        imwrite(img, out_file)
+    return img
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/optflow.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/optflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8792cb6fb4388a8e421bdc494dd6379c912b462
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/mmcv/visualization/optflow.py
@@ -0,0 +1,112 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+from __future__ import division
+
+import numpy as np
+
+from mmcv.image import rgb2bgr
+from mmcv.video import flowread
+from .image import imshow
+
+
+def flowshow(flow, win_name='', wait_time=0):
+    """Show optical flow.
+
+    Args:
+        flow (ndarray or str): The optical flow to be displayed.
+        win_name (str): The window name.
+        wait_time (int): Value of waitKey param.
+    """
+    flow = flowread(flow)
+    flow_img = flow2rgb(flow)
+    imshow(rgb2bgr(flow_img), win_name, wait_time)
+
+
+def flow2rgb(flow, color_wheel=None, unknown_thr=1e6):
+    """Convert flow map to RGB image.
+
+    Args:
+        flow (ndarray): Array of optical flow.
+        color_wheel (ndarray or None): Color wheel used to map flow field to
+            RGB colorspace. Default color wheel will be used if not specified.
+        unknown_thr (str): Values above this threshold will be marked as
+            unknown and thus ignored.
+
+    Returns:
+        ndarray: RGB image that can be visualized.
+    """
+    assert flow.ndim == 3 and flow.shape[-1] == 2
+    if color_wheel is None:
+        color_wheel = make_color_wheel()
+    assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3
+    num_bins = color_wheel.shape[0]
+
+    dx = flow[:, :, 0].copy()
+    dy = flow[:, :, 1].copy()
+
+    ignore_inds = (
+        np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) |
+        (np.abs(dy) > unknown_thr))
+    dx[ignore_inds] = 0
+    dy[ignore_inds] = 0
+
+    rad = np.sqrt(dx**2 + dy**2)
+    if np.any(rad > np.finfo(float).eps):
+        max_rad = np.max(rad)
+        dx /= max_rad
+        dy /= max_rad
+
+    rad = np.sqrt(dx**2 + dy**2)
+    angle = np.arctan2(-dy, -dx) / np.pi
+
+    bin_real = (angle + 1) / 2 * (num_bins - 1)
+    bin_left = np.floor(bin_real).astype(int)
+    bin_right = (bin_left + 1) % num_bins
+    w = (bin_real - bin_left.astype(np.float32))[..., None]
+    flow_img = (1 -
+                w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :]
+    small_ind = rad <= 1
+    flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind])
+    flow_img[np.logical_not(small_ind)] *= 0.75
+
+    flow_img[ignore_inds, :] = 0
+
+    return flow_img
+
+
+def make_color_wheel(bins=None):
+    """Build a color wheel.
+
+    Args:
+        bins(list or tuple, optional): Specify the number of bins for each
+            color range, corresponding to six ranges: red -> yellow,
+            yellow -> green, green -> cyan, cyan -> blue, blue -> magenta,
+            magenta -> red. [15, 6, 4, 11, 13, 6] is used for default
+            (see Middlebury).
+
+    Returns:
+        ndarray: Color wheel of shape (total_bins, 3).
+    """
+    if bins is None:
+        bins = [15, 6, 4, 11, 13, 6]
+    assert len(bins) == 6
+
+    RY, YG, GC, CB, BM, MR = tuple(bins)
+
+    ry = [1, np.arange(RY) / RY, 0]
+    yg = [1 - np.arange(YG) / YG, 1, 0]
+    gc = [0, 1, np.arange(GC) / GC]
+    cb = [0, 1 - np.arange(CB) / CB, 1]
+    bm = [np.arange(BM) / BM, 0, 1]
+    mr = [1, 0, 1 - np.arange(MR) / MR]
+
+    num_bins = RY + YG + GC + CB + BM + MR
+
+    color_wheel = np.zeros((3, num_bins), dtype=np.float32)
+
+    col = 0
+    for i, color in enumerate([ry, yg, gc, cb, bm, mr]):
+        for j in range(3):
+            color_wheel[j, col:col + bins[i]] = color[j]
+        col += bins[i]
+
+    return color_wheel.T
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/pytest_runner-6.0.0-py3-none-any.whl b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/pytest_runner-6.0.0-py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..1dd348383735e18b46eb5bfaf0cce325a759a1da
Binary files /dev/null and b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/pytest_runner-6.0.0-py3-none-any.whl differ
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/requirements/runtime.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/requirements/runtime.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc98a65c1cb3ec45d887837c9d20884452f339a3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/requirements/runtime.txt
@@ -0,0 +1,6 @@
+addict
+numpy
+Pillow
+pyyaml
+regex;sys_platform=='win32'
+yapf
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/setup.cfg b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..924077215eb1854d2a796d3462ef904ac67b64e9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/setup.cfg
@@ -0,0 +1,24 @@
+[bdist_wheel]
+universal = 1
+
+[aliases]
+test = pytest
+
+[yapf]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = true
+split_before_expression_after_opening_paren = true
+
+[isort]
+line_length = 79
+multi_line_output = 0
+known_standard_library = pkg_resources,setuptools,logging,os,warnings,abc
+known_first_party = mmcv
+known_third_party = addict,cv2,m2r,numpy,onnx,onnxoptimizer,onnxruntime,packaging,pytest,recommonmark,resnet_cifar,tensorrt,torch,torchvision,yaml,yapf
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
+
+[egg_info]
+tag_build = 
+tag_date = 0
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/setup.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..397019379422b64fed1b569eab800e9945adc7aa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmcv-1.2.7/setup.py
@@ -0,0 +1,304 @@
+import glob
+import os
+import re
+from pkg_resources import DistributionNotFound, get_distribution
+from setuptools import find_packages, setup
+
+EXT_TYPE = ''
+try:
+    import torch
+    if torch.__version__ == 'parrots':
+        from parrots.utils.build_extension import BuildExtension
+        EXT_TYPE = 'parrots'
+    else:
+        from torch.utils.cpp_extension import BuildExtension
+        EXT_TYPE = 'pytorch'
+    cmd_class = {'build_ext': BuildExtension}
+except ModuleNotFoundError:
+    cmd_class = {}
+    print('Skip building ext ops due to the absence of torch.')
+
+
+def choose_requirement(primary, secondary):
+    """If some version of primary requirement installed, return primary, else
+    return secondary."""
+    try:
+        name = re.split(r'[!<>=]', primary)[0]
+        get_distribution(name)
+    except DistributionNotFound:
+        return secondary
+
+    return str(primary)
+
+
+def get_version():
+    version_file = 'mmcv/version.py'
+    with open(version_file, 'r', encoding='utf-8') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+
+
+def parse_requirements(fname='requirements/runtime.txt', with_version=True):
+    """Parse the package dependencies listed in a requirements file but strips
+    specific versioning information.
+
+    Args:
+        fname (str): path to requirements file
+        with_version (bool, default=False): if True include version specs
+
+    Returns:
+        List[str]: list of requirements items
+
+    CommandLine:
+        python -c "import setup; print(setup.parse_requirements())"
+    """
+    import sys
+    from os.path import exists
+    require_fpath = fname
+
+    def parse_line(line):
+        """Parse information from a line in a requirements text file."""
+        if line.startswith('-r '):
+            # Allow specifying requirements in other files
+            target = line.split(' ')[1]
+            for info in parse_require_file(target):
+                yield info
+        else:
+            info = {'line': line}
+            if line.startswith('-e '):
+                info['package'] = line.split('#egg=')[1]
+            else:
+                # Remove versioning from the package
+                pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+                parts = re.split(pat, line, maxsplit=1)
+                parts = [p.strip() for p in parts]
+
+                info['package'] = parts[0]
+                if len(parts) > 1:
+                    op, rest = parts[1:]
+                    if ';' in rest:
+                        # Handle platform specific dependencies
+                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+                        version, platform_deps = map(str.strip,
+                                                     rest.split(';'))
+                        info['platform_deps'] = platform_deps
+                    else:
+                        version = rest  # NOQA
+                    info['version'] = (op, version)
+            yield info
+
+    def parse_require_file(fpath):
+        with open(fpath, 'r') as f:
+            for line in f.readlines():
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    for info in parse_line(line):
+                        yield info
+
+    def gen_packages_items():
+        if exists(require_fpath):
+            for info in parse_require_file(require_fpath):
+                parts = [info['package']]
+                if with_version and 'version' in info:
+                    parts.extend(info['version'])
+                if not sys.version.startswith('3.4'):
+                    # apparently package_deps are broken in 3.4
+                    platform_deps = info.get('platform_deps')
+                    if platform_deps is not None:
+                        parts.append(';' + platform_deps)
+                item = ''.join(parts)
+                yield item
+
+    packages = list(gen_packages_items())
+    return packages
+
+
+install_requires = parse_requirements()
+
+try:
+    # OpenCV installed via conda.
+    import cv2  # NOQA: F401
+    major, minor, *rest = cv2.__version__.split('.')
+    if int(major) < 3:
+        raise RuntimeError(
+            f'OpenCV >=3 is required but {cv2.__version__} is installed')
+except ImportError:
+    # If first not installed install second package
+    CHOOSE_INSTALL_REQUIRES = [('opencv-python-headless>=3',
+                                'opencv-python>=3')]
+    for main, secondary in CHOOSE_INSTALL_REQUIRES:
+        install_requires.append(choose_requirement(main, secondary))
+
+
+def get_extensions():
+    extensions = []
+
+    if os.getenv('MMCV_WITH_TRT', '0') != '0':
+        ext_name = 'mmcv._ext_trt'
+        from torch.utils.cpp_extension import include_paths, library_paths
+        library_dirs = []
+        libraries = []
+        include_dirs = []
+        tensorrt_path = os.getenv('TENSORRT_DIR', '0')
+        tensorrt_lib_path = glob.glob(
+            os.path.join(tensorrt_path, 'targets', '*', 'lib'))[0]
+        library_dirs += [tensorrt_lib_path]
+        libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin']
+        libraries += ['cudart']
+        kwargs = {}
+        define_macros = []
+        extra_compile_args = {'cxx': []}
+
+        include_path = os.path.abspath('./mmcv/ops/csrc')
+        include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt')
+        include_dirs.append(include_path)
+        include_dirs.append(include_trt_path)
+        include_dirs.append(os.path.join(tensorrt_path, 'include'))
+        include_dirs += include_paths(cuda=True)
+
+        op_files = glob.glob('./mmcv/ops/csrc/tensorrt/plugins/*')
+        define_macros += [('MMCV_WITH_CUDA', None)]
+        define_macros += [('MMCV_WITH_TRT', None)]
+        cuda_args = os.getenv('MMCV_CUDA_ARGS')
+        extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
+        library_dirs += library_paths(cuda=True)
+
+        kwargs['library_dirs'] = library_dirs
+        kwargs['libraries'] = libraries
+
+        from setuptools import Extension
+        ext_ops = Extension(
+            name=ext_name,
+            sources=op_files,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+            language='c++',
+            library_dirs=library_dirs,
+            libraries=libraries)
+        extensions.append(ext_ops)
+
+    if os.getenv('MMCV_WITH_OPS', '0') == '0':
+        return extensions
+
+    if EXT_TYPE == 'parrots':
+        ext_name = 'mmcv._ext'
+        from parrots.utils.build_extension import Extension
+        define_macros = [('MMCV_USE_PARROTS', None)]
+        op_files = glob.glob('./mmcv/ops/csrc/parrots/*')
+        include_path = os.path.abspath('./mmcv/ops/csrc')
+        cuda_args = os.getenv('MMCV_CUDA_ARGS')
+        ext_ops = Extension(
+            name=ext_name,
+            sources=op_files,
+            include_dirs=[include_path],
+            define_macros=define_macros,
+            extra_compile_args={
+                'nvcc': [cuda_args] if cuda_args else [],
+                'cxx': [],
+            },
+            cuda=True)
+        extensions.append(ext_ops)
+    elif EXT_TYPE == 'pytorch':
+        ext_name = 'mmcv._ext'
+        from torch.utils.cpp_extension import CppExtension, CUDAExtension
+
+        # prevent ninja from using too many resources
+        os.environ.setdefault('MAX_JOBS', '4')
+        define_macros = []
+        extra_compile_args = {'cxx': []}
+
+        if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
+            define_macros += [('MMCV_WITH_CUDA', None)]
+            cuda_args = os.getenv('MMCV_CUDA_ARGS')
+            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
+            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*')
+            extension = CUDAExtension
+        else:
+            print(f'Compiling {ext_name} without CUDA')
+            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp')
+            extension = CppExtension
+
+        include_path = os.path.abspath('./mmcv/ops/csrc')
+        ext_ops = extension(
+            name=ext_name,
+            sources=op_files,
+            include_dirs=[include_path],
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args)
+        extensions.append(ext_ops)
+
+    if EXT_TYPE == 'pytorch' and os.getenv('MMCV_WITH_ORT', '0') != '0':
+        ext_name = 'mmcv._ext_ort'
+        from torch.utils.cpp_extension import library_paths, include_paths
+        import onnxruntime
+        library_dirs = []
+        libraries = []
+        include_dirs = []
+        ort_path = os.getenv('ONNXRUNTIME_DIR', '0')
+        library_dirs += [os.path.join(ort_path, 'lib')]
+        libraries.append('onnxruntime')
+        kwargs = {}
+        define_macros = []
+        extra_compile_args = {'cxx': []}
+
+        include_path = os.path.abspath('./mmcv/ops/csrc/onnxruntime')
+        include_dirs.append(include_path)
+        include_dirs.append(os.path.join(ort_path, 'include'))
+        include_dirs += include_paths(cuda=True)
+
+        op_files = glob.glob('./mmcv/ops/csrc/onnxruntime/cpu/*')
+        if onnxruntime.get_device() == 'GPU' or os.getenv('FORCE_CUDA',
+                                                          '0') == '1':
+            define_macros += [('MMCV_WITH_CUDA', None)]
+            cuda_args = os.getenv('MMCV_CUDA_ARGS')
+            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
+            op_files += glob.glob('./mmcv/ops/csrc/onnxruntime/gpu/*')
+            library_dirs += library_paths(cuda=True)
+        else:
+            library_dirs += library_paths(cuda=False)
+
+        kwargs['library_dirs'] = library_dirs
+        kwargs['libraries'] = libraries
+
+        from setuptools import Extension
+        ext_ops = Extension(
+            name=ext_name,
+            sources=op_files,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+            language='c++',
+            library_dirs=library_dirs,
+            libraries=libraries)
+        extensions.append(ext_ops)
+
+    return extensions
+
+
+setup(
+    name='mmcv' if os.getenv('MMCV_WITH_OPS', '0') == '0' else 'mmcv-full',
+    version=get_version(),
+    description='OpenMMLab Computer Vision Foundation',
+    keywords='computer vision',
+    packages=find_packages(),
+    include_package_data=True,
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'License :: OSI Approved :: Apache Software License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Topic :: Utilities',
+    ],
+    url='https://github.com/open-mmlab/mmcv',
+    author='MMCV Authors',
+    author_email='openmmlab@gmail.com',
+    setup_requires=['pytest-runner'],
+    tests_require=['pytest'],
+    install_requires=install_requires,
+    ext_modules=get_extensions(),
+    cmdclass=cmd_class,
+    zip_safe=False)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f301a5dc34eab89c72f89259416f2f1092f46f1e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/__init__.py
@@ -0,0 +1,30 @@
+import mmcv
+
+from .version import __version__, version_info
+
+MMCV_MIN = '1.1.4'
+MMCV_MAX = '1.3.0'
+
+
+def digit_version(version_str):
+    digit_version = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            digit_version.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            digit_version.append(int(patch_version[0]) - 1)
+            digit_version.append(int(patch_version[1]))
+    return digit_version
+
+
+mmcv_min_version = digit_version(MMCV_MIN)
+mmcv_max_version = digit_version(MMCV_MAX)
+mmcv_version = digit_version(mmcv.__version__)
+
+
+assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.'
+
+__all__ = ['__version__', 'version_info']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..170724be38de42daf2bc1a1910e181d68818f165
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/__init__.py
@@ -0,0 +1,9 @@
+from .inference import inference_segmentor, init_segmentor, show_result_pyplot
+from .test import multi_gpu_test, single_gpu_test
+from .train import get_root_logger, set_random_seed, train_segmentor
+
+__all__ = [
+    'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor',
+    'inference_segmentor', 'multi_gpu_test', 'single_gpu_test',
+    'show_result_pyplot'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/inference.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..20c20dccda2837fb54e8ceaa2aa7e74404c085f7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/inference.py
@@ -0,0 +1,118 @@
+import matplotlib.pyplot as plt
+import mmcv
+import torch
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+
+from mmseg.datasets.pipelines import Compose
+from mmseg.models import build_segmentor
+
+
+def init_segmentor(config, checkpoint=None, device='cuda:0'):
+    """Initialize a segmentor from config file.
+
+    Args:
+        config (str or :obj:`mmcv.Config`): Config file path or the config
+            object.
+        checkpoint (str, optional): Checkpoint path. If left as None, the model
+            will not load any weights.
+        device (str, optional) CPU/CUDA device option. Default 'cuda:0'.
+            Use 'cpu' for loading model on CPU.
+    Returns:
+        nn.Module: The constructed segmentor.
+    """
+    if isinstance(config, str):
+        config = mmcv.Config.fromfile(config)
+    elif not isinstance(config, mmcv.Config):
+        raise TypeError('config must be a filename or Config object, '
+                        'but got {}'.format(type(config)))
+    config.model.pretrained = None
+    config.model.train_cfg = None
+    model = build_segmentor(config.model, test_cfg=config.get('test_cfg'))
+    if checkpoint is not None:
+        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
+        model.CLASSES = checkpoint['meta']['CLASSES']
+        model.PALETTE = checkpoint['meta']['PALETTE']
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model
+
+
+class LoadImage:
+    """A simple pipeline to load image."""
+
+    def __call__(self, results):
+        """Call function to load images into results.
+
+        Args:
+            results (dict): A result dict contains the file name
+                of the image to be read.
+
+        Returns:
+            dict: ``results`` will be returned containing loaded image.
+        """
+
+        if isinstance(results['img'], str):
+            results['filename'] = results['img']
+            results['ori_filename'] = results['img']
+        else:
+            results['filename'] = None
+            results['ori_filename'] = None
+        img = mmcv.imread(results['img'])
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['ori_shape'] = img.shape
+        return results
+
+
+def inference_segmentor(model, img):
+    """Inference image(s) with the segmentor.
+
+    Args:
+        model (nn.Module): The loaded segmentor.
+        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
+            images.
+
+    Returns:
+        (list[Tensor]): The segmentation result.
+    """
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    # build the data pipeline
+    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
+    test_pipeline = Compose(test_pipeline)
+    # prepare data
+    data = dict(img=img)
+    data = test_pipeline(data)
+    data = collate([data], samples_per_gpu=1)
+    if next(model.parameters()).is_cuda:
+        # scatter to specified GPU
+        data = scatter(data, [device])[0]
+    else:
+        data['img_metas'] = [i.data[0] for i in data['img_metas']]
+
+    # forward the model
+    with torch.no_grad():
+        result = model(return_loss=False, rescale=True, **data)
+    return result
+
+
+def show_result_pyplot(model, img, result, palette=None, fig_size=(15, 10)):
+    """Visualize the segmentation results on the image.
+
+    Args:
+        model (nn.Module): The loaded segmentor.
+        img (str or np.ndarray): Image filename or loaded image.
+        result (list): The segmentation result.
+        palette (list[list[int]]] | None): The palette of segmentation
+            map. If None is given, random palette will be generated.
+            Default: None
+        fig_size (tuple): Figure size of the pyplot figure.
+    """
+    if hasattr(model, 'module'):
+        model = model.module
+    img = model.show_result(img, result, palette=palette, show=False)
+    plt.figure(figsize=fig_size)
+    plt.imshow(mmcv.bgr2rgb(img))
+    plt.show()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/test.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2137943131c25a7050b7066c1afa72a634fb7ba
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/test.py
@@ -0,0 +1,232 @@
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+#from IPython import embed
+from mmseg.ops import resize
+
+def np2tmp(array, temp_file_name=None):
+    """Save ndarray to local numpy file.
+
+    Args:
+        array (ndarray): Ndarray to save.
+        temp_file_name (str): Numpy file name. If 'temp_file_name=None', this
+            function will generate a file name with tempfile.NamedTemporaryFile
+            to save ndarray. Default: None.
+
+    Returns:
+        str: The numpy file name.
+    """
+
+    if temp_file_name is None:
+        temp_file_name = tempfile.NamedTemporaryFile(
+            suffix='.npy', delete=False).name
+    np.save(temp_file_name, array)
+    return temp_file_name
+
+
+def single_gpu_test(model,
+                    data_loader,
+                    show=False,
+                    out_dir=None,
+                    efficient_test=False):
+    """Test with single GPU.
+
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (utils.data.Dataloader): Pytorch data loader.
+        show (bool): Whether show results during infernece. Default: False.
+        out_dir (str, optional): If specified, the results will be dumped into
+            the directory to save output results.
+        efficient_test (bool): Whether save the results as local numpy files to
+            save CPU memory during evaluation. Default: False.
+
+    Returns:
+        list: The prediction results.
+    """
+
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    prog_bar = mmcv.ProgressBar(len(dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model.forward_val(return_loss=False, **data)
+
+        if show or out_dir:
+            img_tensor = data['img'][0]
+            img_metas = data['img_metas'][0].data[0]
+            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
+            assert len(imgs) == len(img_metas)
+
+            for img, img_meta in zip(imgs, img_metas):
+                h, w, _ = img_meta['img_shape']
+                img_show = img[:h, :w, :]
+
+                ori_h, ori_w = img_meta['ori_shape'][:-1]
+                img_show = mmcv.imresize(img_show, (ori_w, ori_h))
+
+                if out_dir:
+                    out_file = osp.join(out_dir, img_meta['ori_filename'])
+                else:
+                    out_file = None
+
+                model.module.show_result(
+                    img_show,
+                    result,
+                    palette=dataset.PALETTE,
+                    show=show,
+                    out_file=out_file)
+        '''
+        if isinstance(result, list):
+            if efficient_test:
+                result = [np2tmp(_) for _ in result]
+            results.extend(result)
+        else:
+            if efficient_test:
+                result = np2tmp(result)
+            results.append(result)
+        '''
+        batch_size = data['img'][0].size(0)
+        for _ in range(batch_size):
+            prog_bar.update()
+    return results
+
+
+def multi_gpu_test(model,
+                   data_loader,
+                   tmpdir=None,
+                   gpu_collect=False,
+                   efficient_test=False):
+    """Test model with multiple gpus.
+
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (utils.data.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+        efficient_test (bool): Whether save the results as local numpy files to
+            save CPU memory during evaluation. Default: False.
+
+    Returns:
+        list: The prediction results.
+    """
+
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model.forward_val(return_loss=False, rescale=True, **data)
+
+        if isinstance(result, list):
+            if efficient_test:
+                result = [np2tmp(_) for _ in result]
+            results.extend(result)
+        else:
+            if efficient_test:
+                result = np2tmp(result)
+            results.append(result)
+
+        if rank == 0:
+            batch_size = data['img'][0].size(0)
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+
+    # collect results from all ranks
+    results = collect_results_cpu(results, len(dataset), tmpdir)
+    return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+    """Collect results with CPU."""
+    rank, world_size = get_dist_info()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full((MAX_LEN, ),
+                                32,
+                                dtype=torch.uint8,
+                                device='npu')
+        if rank == 0:
+            tmpdir = tempfile.mkdtemp()
+            tmpdir = torch.tensor(
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device='npu')
+            dir_tensor[:len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
+            part_list.append(mmcv.load(part_file))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+    """Collect results with GPU."""
+    rank, world_size = get_dist_info()
+    # dump result part to tensor with pickle
+    part_tensor = torch.tensor(
+        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+    # gather all result part tensor shape
+    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_list = [shape_tensor.clone() for _ in range(world_size)]
+    dist.all_gather(shape_list, shape_tensor)
+    # padding result part tensor to max length
+    shape_max = torch.tensor(shape_list).max()
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send[:shape_tensor[0]] = part_tensor
+    part_recv_list = [
+        part_tensor.new_zeros(shape_max) for _ in range(world_size)
+    ]
+    # gather all result part
+    dist.all_gather(part_recv_list, part_send)
+
+    if rank == 0:
+        part_list = []
+        for recv, shape in zip(part_recv_list, shape_list):
+            part_list.append(
+                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        return ordered_results
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/train.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6aeb74e4984779e8015a89d4763345d81e8af7a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/apis/train.py
@@ -0,0 +1,125 @@
+import random
+import warnings
+
+import numpy as np
+import torch
+import os
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import build_optimizer, build_runner
+
+from mmseg.core import DistEvalHook, EvalHook
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.utils import get_root_logger
+from apex import amp
+import apex
+
+def set_random_seed(seed, deterministic=False):
+    """Set random seed.
+    Args:
+        seed (int): Seed to be used.
+        deterministic (bool): Whether to set the deterministic option for
+            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+            to True and `torch.backends.cudnn.benchmark` to False.
+            Default: False.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    if deterministic:
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+
+def train_segmentor(model,
+                    dataset,
+                    cfg,
+                    distributed=False,
+                    validate=False,
+                    timestamp=None,
+                    meta=None):
+    """Launch segmentor training."""
+    logger = get_root_logger(cfg.log_level)
+
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            len(cfg.gpu_ids),
+            dist=distributed,
+            seed=cfg.seed,
+            drop_last=True) for ds in dataset
+    ]
+
+    # put model on gpus
+    local_rank = os.environ["LOCAL_RANK"]
+    os.environ["BATCH_SIZE"] = str(cfg.data['samples_per_gpu'])
+    if distributed:
+        os.environ["BATCH_SIZE"] = str(int(os.environ["WORLD_SIZE"]) *  cfg.data['samples_per_gpu'])
+    if distributed:
+        model = model.to(f'npu:{local_rank}')
+    else:
+        model = model.to('npu:0')
+    optimizer = apex.optimizers.NpuFusedAdamW(model.parameters(), cfg.optimizer['lr'], weight_decay=cfg.optimizer['weight_decay']) #
+    #optimizer = build_optimizer(model, cfg.optimizer)  
+    model, optimizer = amp.initialize(
+        model, optimizer, opt_level='O1', loss_scale=128., combine_grad=True)
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', False)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model,
+            device_ids=[int(local_rank)],
+            broadcast_buffers=False,
+            find_unused_parameters=False)
+    else:
+        model = MMDataParallel(model, device_ids=[0])
+    
+    if cfg.get('runner') is None:
+        cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters}
+        warnings.warn(
+            'config is now expected to have a `runner` section, '
+            'please set `runner` in your config.', UserWarning)
+
+    runner = build_runner(
+        cfg.runner,
+        default_args=dict(
+            model=model,
+            batch_processor=None,
+            optimizer=optimizer,
+            work_dir=cfg.work_dir,
+            logger=logger,
+            meta=meta))
+
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config,
+                                   cfg.get('momentum_config', None))
+
+    # an ugly walkaround to make the .log and .log.json filenames the same
+    runner.timestamp = timestamp
+
+    # register eval hooks
+    if validate:
+        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=1,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False)
+        eval_cfg = cfg.get('evaluation', {})
+        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
+        eval_hook = DistEvalHook if distributed else EvalHook 
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..965605587211b7bf0bd6bc3acdbb33dd49cab023
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/__init__.py
@@ -0,0 +1,3 @@
+from .evaluation import *  # noqa: F401, F403
+from .seg import *  # noqa: F401, F403
+from .utils import *  # noqa: F401, F403
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c58d926f06b3225f0ac93e10a3766ece6b6a8b2a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/__init__.py
@@ -0,0 +1,8 @@
+from .class_names import get_classes, get_palette
+from .eval_hooks import DistEvalHook, EvalHook
+from .metrics import eval_metrics, mean_dice, mean_iou
+
+__all__ = [
+    'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'eval_metrics',
+    'get_classes', 'get_palette'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/class_names.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/class_names.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d8e66d54b47c200d969ec9fb0bbb642be5d12c3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/class_names.py
@@ -0,0 +1,152 @@
+import mmcv
+
+
+def cityscapes_classes():
+    """Cityscapes class names for external use."""
+    return [
+        'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
+        'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
+        'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+        'bicycle'
+    ]
+
+
+def ade_classes():
+    """ADE20K class names for external use."""
+    return [
+        'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ',
+        'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth',
+        'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car',
+        'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug',
+        'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe',
+        'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
+        'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
+        'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path',
+        'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door',
+        'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table',
+        'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove',
+        'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar',
+        'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
+        'chandelier', 'awning', 'streetlight', 'booth', 'television receiver',
+        'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister',
+        'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van',
+        'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything',
+        'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent',
+        'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank',
+        'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake',
+        'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce',
+        'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen',
+        'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
+        'clock', 'flag'
+    ]
+
+
+def voc_classes():
+    """Pascal VOC class names for external use."""
+    return [
+        'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
+        'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+        'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+        'tvmonitor'
+    ]
+
+
+def cityscapes_palette():
+    """Cityscapes palette for external use."""
+    return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
+            [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
+            [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
+            [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
+            [0, 0, 230], [119, 11, 32]]
+
+
+def ade_palette():
+    """ADE20K palette for external use."""
+    return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
+            [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
+            [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
+            [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
+            [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
+            [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
+            [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
+            [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
+            [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
+            [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
+            [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
+            [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
+            [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
+            [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
+            [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
+            [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
+            [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
+            [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
+            [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
+            [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
+            [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
+            [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
+            [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
+            [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
+            [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
+            [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
+            [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
+            [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
+            [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
+            [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
+            [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
+            [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
+            [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
+            [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
+            [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
+            [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
+            [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
+            [102, 255, 0], [92, 0, 255]]
+
+
+def voc_palette():
+    """Pascal VOC palette for external use."""
+    return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
+            [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
+            [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
+            [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
+            [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
+
+
+dataset_aliases = {
+    'cityscapes': ['cityscapes'],
+    'ade': ['ade', 'ade20k'],
+    'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug']
+}
+
+
+def get_classes(dataset):
+    """Get class names of a dataset."""
+    alias2name = {}
+    for name, aliases in dataset_aliases.items():
+        for alias in aliases:
+            alias2name[alias] = name
+
+    if mmcv.is_str(dataset):
+        if dataset in alias2name:
+            labels = eval(alias2name[dataset] + '_classes()')
+        else:
+            raise ValueError(f'Unrecognized dataset: {dataset}')
+    else:
+        raise TypeError(f'dataset must a str, but got {type(dataset)}')
+    return labels
+
+
+def get_palette(dataset):
+    """Get class palette (RGB) of a dataset."""
+    alias2name = {}
+    for name, aliases in dataset_aliases.items():
+        for alias in aliases:
+            alias2name[alias] = name
+
+    if mmcv.is_str(dataset):
+        if dataset in alias2name:
+            labels = eval(alias2name[dataset] + '_palette()')
+        else:
+            raise ValueError(f'Unrecognized dataset: {dataset}')
+    else:
+        raise TypeError(f'dataset must a str, but got {type(dataset)}')
+    return labels
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/eval_hooks.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/eval_hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56e91f953c833aaf2a908114799828a14c06248
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/eval_hooks.py
@@ -0,0 +1,110 @@
+import os.path as osp
+
+from mmcv.runner import Hook
+import torch
+from torch.utils.data import DataLoader
+
+
+class EvalHook(Hook):
+    """Evaluation hook.
+
+    Attributes:
+        dataloader (DataLoader): A PyTorch dataloader.
+        interval (int): Evaluation interval (by epochs). Default: 1.
+    """
+
+    def __init__(self, dataloader, interval=1, by_epoch=False, **eval_kwargs):
+        if not isinstance(dataloader, DataLoader):
+            raise TypeError('dataloader must be a pytorch DataLoader, but got '
+                            f'{type(dataloader)}')
+        self.dataloader = dataloader
+        self.interval = interval
+        self.by_epoch = by_epoch
+        self.eval_kwargs = eval_kwargs
+
+    def after_train_iter(self, runner):
+        """After train epoch hook."""
+        if self.by_epoch or not self.every_n_iters(runner, self.interval):
+            return
+        from mmseg.apis import single_gpu_test
+        torch.npu.synchronize()
+        runner.log_buffer.clear()
+        results = single_gpu_test(runner.model, self.dataloader, show=False)
+        self.evaluate(runner, results)
+        torch.npu.synchronize()
+
+    def after_train_epoch(self, runner):
+        """After train epoch hook."""
+        if not self.by_epoch or not self.every_n_epochs(runner, self.interval):
+            return
+        from mmseg.apis import single_gpu_test
+        runner.log_buffer.clear()
+        results = single_gpu_test(runner.model, self.dataloader, show=False)
+        self.evaluate(runner, results)
+
+    def evaluate(self, runner, results):
+        """Call evaluate function of dataset."""
+        eval_res = self.dataloader.dataset.evaluate(
+            results, logger=runner.logger, **self.eval_kwargs)
+        for name, val in eval_res.items():
+            runner.log_buffer.output[name] = val
+        runner.log_buffer.ready = True
+
+
+class DistEvalHook(EvalHook):
+    """Distributed evaluation hook.
+
+    Attributes:
+        dataloader (DataLoader): A PyTorch dataloader.
+        interval (int): Evaluation interval (by epochs). Default: 1.
+        tmpdir (str | None): Temporary directory to save the results of all
+            processes. Default: None.
+        gpu_collect (bool): Whether to use gpu or cpu to collect results.
+            Default: False.
+    """
+
+    def __init__(self,
+                 dataloader,
+                 interval=1,
+                 gpu_collect=False,
+                 by_epoch=False,
+                 **eval_kwargs):
+        if not isinstance(dataloader, DataLoader):
+            raise TypeError(
+                'dataloader must be a pytorch DataLoader, but got {}'.format(
+                    type(dataloader)))
+        self.dataloader = dataloader
+        self.interval = interval
+        self.gpu_collect = gpu_collect
+        self.by_epoch = by_epoch
+        self.eval_kwargs = eval_kwargs
+
+    def after_train_iter(self, runner):
+        """After train epoch hook."""
+        if self.by_epoch or not self.every_n_iters(runner, self.interval):
+            return
+        from mmseg.apis import multi_gpu_test
+        runner.log_buffer.clear()
+        results = multi_gpu_test(
+            runner.model,
+            self.dataloader,
+            tmpdir=osp.join(runner.work_dir, '.eval_hook'),
+            gpu_collect=self.gpu_collect)
+        if runner.rank == 0:
+            print('\n')
+            self.evaluate(runner, results)
+
+    def after_train_epoch(self, runner):
+        """After train epoch hook."""
+        if not self.by_epoch or not self.every_n_epochs(runner, self.interval):
+            return
+        from mmseg.apis import multi_gpu_test
+        runner.log_buffer.clear()
+        results = multi_gpu_test(
+            runner.model,
+            self.dataloader,
+            tmpdir=osp.join(runner.work_dir, '.eval_hook'),
+            gpu_collect=self.gpu_collect)
+        if runner.rank == 0:
+            print('\n')
+            self.evaluate(runner, results)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/metrics.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..95b096e7a3d0034c09b9116f7d5614bb4e2601b1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/evaluation/metrics.py
@@ -0,0 +1,229 @@
+import mmcv
+import numpy as np
+
+
+def intersect_and_union(pred_label,
+                        label,
+                        num_classes,
+                        ignore_index,
+                        label_map=dict(),
+                        reduce_zero_label=False):
+    """Calculate intersection and Union.
+
+    Args:
+        pred_label (ndarray): Prediction segmentation map.
+        label (ndarray): Ground truth segmentation map.
+        num_classes (int): Number of categories.
+        ignore_index (int): Index that will be ignored in evaluation.
+        label_map (dict): Mapping old labels to new labels. The parameter will
+            work only when label is str. Default: dict().
+        reduce_zero_label (bool): Wether ignore zero label. The parameter will
+            work only when label is str. Default: False.
+
+     Returns:
+         ndarray: The intersection of prediction and ground truth histogram
+             on all classes.
+         ndarray: The union of prediction and ground truth histogram on all
+             classes.
+         ndarray: The prediction histogram on all classes.
+         ndarray: The ground truth histogram on all classes.
+    """
+
+    if isinstance(pred_label, str):
+        pred_label = np.load(pred_label)
+
+    if isinstance(label, str):
+        label = mmcv.imread(label, flag='unchanged', backend='pillow')
+    # modify if custom classes
+    if label_map is not None:
+        for old_id, new_id in label_map.items():
+            label[label == old_id] = new_id
+    if reduce_zero_label:
+        # avoid using underflow conversion
+        label[label == 0] = 255
+        label = label - 1
+        label[label == 254] = 255
+
+    mask = (label != ignore_index)
+    pred_label = pred_label[mask]
+    label = label[mask]
+
+    intersect = pred_label[pred_label == label]
+    area_intersect, _ = np.histogram(
+        intersect, bins=np.arange(num_classes + 1))
+    area_pred_label, _ = np.histogram(
+        pred_label, bins=np.arange(num_classes + 1))
+    area_label, _ = np.histogram(label, bins=np.arange(num_classes + 1))
+    area_union = area_pred_label + area_label - area_intersect
+
+    return area_intersect, area_union, area_pred_label, area_label
+
+
+def total_intersect_and_union(results,
+                              gt_seg_maps,
+                              num_classes,
+                              ignore_index,
+                              label_map=dict(),
+                              reduce_zero_label=False):
+    """Calculate Total Intersection and Union.
+
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps.
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps.
+        num_classes (int): Number of categories.
+        ignore_index (int): Index that will be ignored in evaluation.
+        label_map (dict): Mapping old labels to new labels. Default: dict().
+        reduce_zero_label (bool): Wether ignore zero label. Default: False.
+
+     Returns:
+         ndarray: The intersection of prediction and ground truth histogram
+             on all classes.
+         ndarray: The union of prediction and ground truth histogram on all
+             classes.
+         ndarray: The prediction histogram on all classes.
+         ndarray: The ground truth histogram on all classes.
+    """
+
+    num_imgs = len(results)
+    assert len(gt_seg_maps) == num_imgs
+    total_area_intersect = np.zeros((num_classes, ), dtype=np.float)
+    total_area_union = np.zeros((num_classes, ), dtype=np.float)
+    total_area_pred_label = np.zeros((num_classes, ), dtype=np.float)
+    total_area_label = np.zeros((num_classes, ), dtype=np.float)
+    for i in range(num_imgs):
+        area_intersect, area_union, area_pred_label, area_label = \
+            intersect_and_union(results[i], gt_seg_maps[i], num_classes,
+                                ignore_index, label_map, reduce_zero_label)
+        total_area_intersect += area_intersect
+        total_area_union += area_union
+        total_area_pred_label += area_pred_label
+        total_area_label += area_label
+    return total_area_intersect, total_area_union, \
+        total_area_pred_label, total_area_label
+
+
+def mean_iou(results,
+             gt_seg_maps,
+             num_classes,
+             ignore_index,
+             nan_to_num=None,
+             label_map=dict(),
+             reduce_zero_label=False):
+    """Calculate Mean Intersection and Union (mIoU)
+
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps.
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps.
+        num_classes (int): Number of categories.
+        ignore_index (int): Index that will be ignored in evaluation.
+        nan_to_num (int, optional): If specified, NaN values will be replaced
+            by the numbers defined by the user. Default: None.
+        label_map (dict): Mapping old labels to new labels. Default: dict().
+        reduce_zero_label (bool): Wether ignore zero label. Default: False.
+
+     Returns:
+         float: Overall accuracy on all images.
+         ndarray: Per category accuracy, shape (num_classes, ).
+         ndarray: Per category IoU, shape (num_classes, ).
+    """
+
+    all_acc, acc, iou = eval_metrics(
+        results=results,
+        gt_seg_maps=gt_seg_maps,
+        num_classes=num_classes,
+        ignore_index=ignore_index,
+        metrics=['mIoU'],
+        nan_to_num=nan_to_num,
+        label_map=label_map,
+        reduce_zero_label=reduce_zero_label)
+    return all_acc, acc, iou
+
+
+def mean_dice(results,
+              gt_seg_maps,
+              num_classes,
+              ignore_index,
+              nan_to_num=None,
+              label_map=dict(),
+              reduce_zero_label=False):
+    """Calculate Mean Dice (mDice)
+
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps.
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps.
+        num_classes (int): Number of categories.
+        ignore_index (int): Index that will be ignored in evaluation.
+        nan_to_num (int, optional): If specified, NaN values will be replaced
+            by the numbers defined by the user. Default: None.
+        label_map (dict): Mapping old labels to new labels. Default: dict().
+        reduce_zero_label (bool): Wether ignore zero label. Default: False.
+
+     Returns:
+         float: Overall accuracy on all images.
+         ndarray: Per category accuracy, shape (num_classes, ).
+         ndarray: Per category dice, shape (num_classes, ).
+    """
+
+    all_acc, acc, dice = eval_metrics(
+        results=results,
+        gt_seg_maps=gt_seg_maps,
+        num_classes=num_classes,
+        ignore_index=ignore_index,
+        metrics=['mDice'],
+        nan_to_num=nan_to_num,
+        label_map=label_map,
+        reduce_zero_label=reduce_zero_label)
+    return all_acc, acc, dice
+
+
+def eval_metrics(results,
+                 gt_seg_maps,
+                 num_classes,
+                 ignore_index,
+                 metrics=['mIoU'],
+                 nan_to_num=None,
+                 label_map=dict(),
+                 reduce_zero_label=False):
+    """Calculate evaluation metrics
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps.
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps.
+        num_classes (int): Number of categories.
+        ignore_index (int): Index that will be ignored in evaluation.
+        metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'.
+        nan_to_num (int, optional): If specified, NaN values will be replaced
+            by the numbers defined by the user. Default: None.
+        label_map (dict): Mapping old labels to new labels. Default: dict().
+        reduce_zero_label (bool): Wether ignore zero label. Default: False.
+     Returns:
+         float: Overall accuracy on all images.
+         ndarray: Per category accuracy, shape (num_classes, ).
+         ndarray: Per category evalution metrics, shape (num_classes, ).
+    """
+
+    if isinstance(metrics, str):
+        metrics = [metrics]
+    allowed_metrics = ['mIoU', 'mDice']
+    if not set(metrics).issubset(set(allowed_metrics)):
+        raise KeyError('metrics {} is not supported'.format(metrics))
+    total_area_intersect, total_area_union, total_area_pred_label, \
+        total_area_label = total_intersect_and_union(results, gt_seg_maps,
+                                                     num_classes, ignore_index,
+                                                     label_map,
+                                                     reduce_zero_label)
+    all_acc = total_area_intersect.sum() / total_area_label.sum()
+    acc = total_area_intersect / total_area_label
+    ret_metrics = [all_acc, acc]
+    for metric in metrics:
+        if metric == 'mIoU':
+            iou = total_area_intersect / total_area_union
+            ret_metrics.append(iou)
+        elif metric == 'mDice':
+            dice = 2 * total_area_intersect / (
+                total_area_pred_label + total_area_label)
+            ret_metrics.append(dice)
+    if nan_to_num is not None:
+        ret_metrics = [
+            np.nan_to_num(metric, nan=nan_to_num) for metric in ret_metrics
+        ]
+    return ret_metrics
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..93bc129b685e4a3efca2cc891729981b2865900d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/__init__.py
@@ -0,0 +1,4 @@
+from .builder import build_pixel_sampler
+from .sampler import BasePixelSampler, OHEMPixelSampler
+
+__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/builder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5a117ce7b221f749d9def4b8ed97c07fe90b6e3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/builder.py
@@ -0,0 +1,8 @@
+from mmcv.utils import Registry, build_from_cfg
+
+PIXEL_SAMPLERS = Registry('pixel sampler')
+
+
+def build_pixel_sampler(cfg, **default_args):
+    """Build pixel sampler for segmentation map."""
+    return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..332b242c03d1c5e80d4577df442a9a037b1816e1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/__init__.py
@@ -0,0 +1,4 @@
+from .base_pixel_sampler import BasePixelSampler
+from .ohem_pixel_sampler import OHEMPixelSampler
+
+__all__ = ['BasePixelSampler', 'OHEMPixelSampler']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/base_pixel_sampler.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/base_pixel_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..db322d199ff809ef1c7962a8c979563386201549
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/base_pixel_sampler.py
@@ -0,0 +1,13 @@
+from abc import ABCMeta, abstractmethod
+
+
+class BasePixelSampler(metaclass=ABCMeta):
+    """Base class of pixel sampler."""
+
+    def __init__(self, **kwargs):
+        pass
+
+    @abstractmethod
+    def sample(self, seg_logit, seg_label):
+        """Placeholder for sample function."""
+        pass
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..88bb10d44026ba9f21756eaea9e550841cd59b9f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
@@ -0,0 +1,76 @@
+import torch
+import torch.nn.functional as F
+
+from ..builder import PIXEL_SAMPLERS
+from .base_pixel_sampler import BasePixelSampler
+
+
+@PIXEL_SAMPLERS.register_module()
+class OHEMPixelSampler(BasePixelSampler):
+    """Online Hard Example Mining Sampler for segmentation.
+
+    Args:
+        context (nn.Module): The context of sampler, subclass of
+            :obj:`BaseDecodeHead`.
+        thresh (float, optional): The threshold for hard example selection.
+            Below which, are prediction with low confidence. If not
+            specified, the hard examples will be pixels of top ``min_kept``
+            loss. Default: None.
+        min_kept (int, optional): The minimum number of predictions to keep.
+            Default: 100000.
+    """
+
+    def __init__(self, context, thresh=None, min_kept=100000):
+        super(OHEMPixelSampler, self).__init__()
+        self.context = context
+        assert min_kept > 1
+        self.thresh = thresh
+        self.min_kept = min_kept
+
+    def sample(self, seg_logit, seg_label):
+        """Sample pixels that have high loss or with low prediction confidence.
+
+        Args:
+            seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W)
+            seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W)
+
+        Returns:
+            torch.Tensor: segmentation weight, shape (N, H, W)
+        """
+        with torch.no_grad():
+            assert seg_logit.shape[2:] == seg_label.shape[2:]
+            assert seg_label.shape[1] == 1
+            seg_label = seg_label.squeeze(1).long()
+            batch_kept = self.min_kept * seg_label.size(0)
+            valid_mask = seg_label != self.context.ignore_index
+            seg_weight = seg_logit.new_zeros(size=seg_label.size())
+            valid_seg_weight = seg_weight[valid_mask]
+            if self.thresh is not None:
+                seg_prob = F.softmax(seg_logit, dim=1)
+
+                tmp_seg_label = seg_label.clone().unsqueeze(1)
+                tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0
+                seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1)
+                sort_prob, sort_indices = seg_prob[valid_mask].sort()
+
+                if sort_prob.numel() > 0:
+                    min_threshold = sort_prob[min(batch_kept,
+                                                  sort_prob.numel() - 1)]
+                else:
+                    min_threshold = 0.0
+                threshold = max(min_threshold, self.thresh)
+                valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.
+            else:
+                losses = self.context.loss_decode(
+                    seg_logit,
+                    seg_label,
+                    weight=None,
+                    ignore_index=self.context.ignore_index,
+                    reduction_override='none')
+                # faster than topk according to https://github.com/pytorch/pytorch/issues/22812  # noqa
+                _, sort_indices = losses[valid_mask].sort(descending=True)
+                valid_seg_weight[sort_indices[:batch_kept]] = 1.
+
+            seg_weight[valid_mask] = valid_seg_weight
+
+            return seg_weight
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2678b321c295bcceaef945111ac3524be19d6e4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/utils/__init__.py
@@ -0,0 +1,3 @@
+from .misc import add_prefix
+
+__all__ = ['add_prefix']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/utils/misc.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb862a82bd47c8624db3dd5c6fb6ad8a03b62466
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/core/utils/misc.py
@@ -0,0 +1,17 @@
+def add_prefix(inputs, prefix):
+    """Add prefix for dict.
+
+    Args:
+        inputs (dict): The input dict with str keys.
+        prefix (str): The prefix to add.
+
+    Returns:
+
+        dict: The dict with keys updated with ``prefix``.
+    """
+
+    outputs = dict()
+    for name, value in inputs.items():
+        outputs[f'{prefix}.{name}'] = value
+
+    return outputs
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9180015e743c087c5434efc5e6445ed729e86e8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/__init__.py
@@ -0,0 +1,20 @@
+from .ade import ADE20KDataset
+from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
+from .chase_db1 import ChaseDB1Dataset
+from .cityscapes import CityscapesDataset
+from .custom import CustomDataset
+from .dataset_wrappers import ConcatDataset, RepeatDataset
+from .drive import DRIVEDataset
+from .hrf import HRFDataset
+from .pascal_context import PascalContextDataset
+from .stare import STAREDataset
+from .voc import PascalVOCDataset
+from .mapillary import MapillaryDataset
+from .cocostuff import CocoStuff
+
+__all__ = [
+    'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
+    'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
+    'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset',
+    'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 'STAREDataset', 'MapillaryDataset', 'CocoStuff'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/ade.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/ade.py
new file mode 100644
index 0000000000000000000000000000000000000000..5913e43775ed4920b6934c855eb5a37c54218ebf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/ade.py
@@ -0,0 +1,84 @@
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class ADE20KDataset(CustomDataset):
+    """ADE20K dataset.
+
+    In segmentation map annotation for ADE20K, 0 stands for background, which
+    is not included in 150 categories. ``reduce_zero_label`` is fixed to True.
+    The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to
+    '.png'.
+    """
+    CLASSES = (
+        'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ',
+        'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth',
+        'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car',
+        'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug',
+        'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe',
+        'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
+        'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
+        'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path',
+        'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door',
+        'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table',
+        'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove',
+        'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar',
+        'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
+        'chandelier', 'awning', 'streetlight', 'booth', 'television receiver',
+        'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister',
+        'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van',
+        'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything',
+        'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent',
+        'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank',
+        'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake',
+        'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce',
+        'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen',
+        'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
+        'clock', 'flag')
+
+    PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
+               [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
+               [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
+               [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
+               [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
+               [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
+               [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
+               [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
+               [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
+               [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
+               [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
+               [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
+               [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
+               [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
+               [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
+               [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
+               [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
+               [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
+               [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
+               [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
+               [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
+               [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
+               [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
+               [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
+               [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
+               [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
+               [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
+               [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
+               [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
+               [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
+               [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
+               [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
+               [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
+               [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
+               [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
+               [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
+               [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
+               [102, 255, 0], [92, 0, 255]]
+
+    def __init__(self, **kwargs):
+        super(ADE20KDataset, self).__init__(
+            img_suffix='.jpg',
+            seg_map_suffix='.png',
+            reduce_zero_label=True,
+            **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/builder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7a9926111cad3c8ab140ab8d289dbc66053517a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/builder.py
@@ -0,0 +1,169 @@
+import copy
+import platform
+import random
+from functools import partial
+
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg
+from mmcv.utils.parrots_wrapper import DataLoader, PoolDataLoader
+from torch.utils.data import DistributedSampler
+
+if platform.system() != 'Windows':
+    # https://github.com/pytorch/pytorch/issues/973
+    import resource
+    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    hard_limit = rlimit[1]
+    soft_limit = min(4096, hard_limit)
+    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
+
+
+def _concat_dataset(cfg, default_args=None):
+    """Build :obj:`ConcatDataset by."""
+    from .dataset_wrappers import ConcatDataset
+    img_dir = cfg['img_dir']
+    ann_dir = cfg.get('ann_dir', None)
+    split = cfg.get('split', None)
+    num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1
+    if ann_dir is not None:
+        num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1
+    else:
+        num_ann_dir = 0
+    if split is not None:
+        num_split = len(split) if isinstance(split, (list, tuple)) else 1
+    else:
+        num_split = 0
+    if num_img_dir > 1:
+        assert num_img_dir == num_ann_dir or num_ann_dir == 0
+        assert num_img_dir == num_split or num_split == 0
+    else:
+        assert num_split == num_ann_dir or num_ann_dir <= 1
+    num_dset = max(num_split, num_img_dir)
+
+    datasets = []
+    for i in range(num_dset):
+        data_cfg = copy.deepcopy(cfg)
+        if isinstance(img_dir, (list, tuple)):
+            data_cfg['img_dir'] = img_dir[i]
+        if isinstance(ann_dir, (list, tuple)):
+            data_cfg['ann_dir'] = ann_dir[i]
+        if isinstance(split, (list, tuple)):
+            data_cfg['split'] = split[i]
+        datasets.append(build_dataset(data_cfg, default_args))
+
+    return ConcatDataset(datasets)
+
+
+def build_dataset(cfg, default_args=None):
+    """Build datasets."""
+    from .dataset_wrappers import ConcatDataset, RepeatDataset
+    if isinstance(cfg, (list, tuple)):
+        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+    elif cfg['type'] == 'RepeatDataset':
+        dataset = RepeatDataset(
+            build_dataset(cfg['dataset'], default_args), cfg['times'])
+    elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance(
+            cfg.get('split', None), (list, tuple)):
+        dataset = _concat_dataset(cfg, default_args)
+    else:
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+
+    return dataset
+
+
+def build_dataloader(dataset,
+                     samples_per_gpu,
+                     workers_per_gpu,
+                     num_gpus=1,
+                     dist=True,
+                     shuffle=True,
+                     seed=None,
+                     drop_last=False,
+                     pin_memory=True,
+                     dataloader_type='PoolDataLoader',
+                     **kwargs):
+    """Build PyTorch DataLoader.
+
+    In distributed training, each GPU/process has a dataloader.
+    In non-distributed training, there is only one dataloader for all GPUs.
+
+    Args:
+        dataset (Dataset): A PyTorch dataset.
+        samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+            batch size of each GPU.
+        workers_per_gpu (int): How many subprocesses to use for data loading
+            for each GPU.
+        num_gpus (int): Number of GPUs. Only used in non-distributed training.
+        dist (bool): Distributed training/test or not. Default: True.
+        shuffle (bool): Whether to shuffle the data at every epoch.
+            Default: True.
+        seed (int | None): Seed to be used. Default: None.
+        drop_last (bool): Whether to drop the last incomplete batch in epoch.
+            Default: False
+        pin_memory (bool): Whether to use pin_memory in DataLoader.
+            Default: True
+        dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader'
+        kwargs: any keyword argument to be used to initialize DataLoader
+
+    Returns:
+        DataLoader: A PyTorch dataloader.
+    """
+    rank, world_size = get_dist_info()
+    if dist:
+        sampler = DistributedSampler(
+            dataset, world_size, rank, shuffle=shuffle)
+        shuffle = False
+        batch_size = samples_per_gpu
+        num_workers = workers_per_gpu
+    else:
+        sampler = None
+        batch_size = num_gpus * samples_per_gpu
+        num_workers = num_gpus * workers_per_gpu
+
+    init_fn = partial(
+        worker_init_fn, num_workers=num_workers, rank=rank,
+        seed=seed) if seed is not None else None
+
+    assert dataloader_type in (
+        'DataLoader',
+        'PoolDataLoader'), f'unsupported dataloader {dataloader_type}'
+
+    if dataloader_type == 'PoolDataLoader':
+        dataloader = PoolDataLoader
+    elif dataloader_type == 'DataLoader':
+        dataloader = DataLoader
+
+    data_loader = dataloader(
+        dataset,
+        batch_size=batch_size,
+        sampler=sampler,
+        num_workers=num_workers,
+        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+        pin_memory=pin_memory,
+        shuffle=shuffle,
+        worker_init_fn=init_fn,
+        drop_last=drop_last,
+        **kwargs)
+
+    return data_loader
+
+
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    """Worker init func for dataloader.
+
+    The seed of each worker equals to num_worker * rank + worker_id + user_seed
+
+    Args:
+        worker_id (int): Worker id.
+        num_workers (int): Number of workers.
+        rank (int): The rank of current process.
+        seed (int): The random seed to use.
+    """
+
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bc29bea14704a4407f83474610cbc3bef32c708
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/chase_db1.py
@@ -0,0 +1,27 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class ChaseDB1Dataset(CustomDataset):
+    """Chase_db1 dataset.
+
+    In segmentation map annotation for Chase_db1, 0 stands for background,
+    which is included in 2 categories. ``reduce_zero_label`` is fixed to False.
+    The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
+    '_1stHO.png'.
+    """
+
+    CLASSES = ('background', 'vessel')
+
+    PALETTE = [[120, 120, 120], [6, 230, 230]]
+
+    def __init__(self, **kwargs):
+        super(ChaseDB1Dataset, self).__init__(
+            img_suffix='.png',
+            seg_map_suffix='_1stHO.png',
+            reduce_zero_label=False,
+            **kwargs)
+        assert osp.exists(self.img_dir)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa9958ac1401644420d264c48cf8d807a44d7cf9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/cityscapes.py
@@ -0,0 +1,217 @@
+import os.path as osp
+import tempfile
+
+import mmcv
+import numpy as np
+from mmcv.utils import print_log
+from PIL import Image
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class CityscapesDataset(CustomDataset):
+    """Cityscapes dataset.
+
+    The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is
+    fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset.
+    """
+
+    CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
+               'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
+               'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+               'bicycle')
+
+    PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
+               [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
+               [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
+               [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+               [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+
+    def __init__(self, **kwargs):
+        super(CityscapesDataset, self).__init__(
+            img_suffix='_leftImg8bit.png',
+            seg_map_suffix='_gtFine_labelTrainIds.png',
+            **kwargs)
+
+    @staticmethod
+    def _convert_to_label_id(result):
+        """Convert trainId to id for cityscapes."""
+        if isinstance(result, str):
+            result = np.load(result)
+        import cityscapesscripts.helpers.labels as CSLabels
+        result_copy = result.copy()
+        for trainId, label in CSLabels.trainId2label.items():
+            result_copy[result == trainId] = label.id
+
+        return result_copy
+
+    def results2img(self, results, imgfile_prefix, to_label_id):
+        """Write the segmentation results to images.
+
+        Args:
+            results (list[list | tuple | ndarray]): Testing results of the
+                dataset.
+            imgfile_prefix (str): The filename prefix of the png files.
+                If the prefix is "somepath/xxx",
+                the png files will be named "somepath/xxx.png".
+            to_label_id (bool): whether convert output to label_id for
+                submission
+
+        Returns:
+            list[str: str]: result txt files which contains corresponding
+            semantic segmentation images.
+        """
+        mmcv.mkdir_or_exist(imgfile_prefix)
+        result_files = []
+        prog_bar = mmcv.ProgressBar(len(self))
+        for idx in range(len(self)):
+            result = results[idx]
+            if to_label_id:
+                result = self._convert_to_label_id(result)
+            filename = self.img_infos[idx]['filename']
+            basename = osp.splitext(osp.basename(filename))[0]
+
+            png_filename = osp.join(imgfile_prefix, f'{basename}.png')
+
+            output = Image.fromarray(result.astype(np.uint8)).convert('P')
+            import cityscapesscripts.helpers.labels as CSLabels
+            palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8)
+            for label_id, label in CSLabels.id2label.items():
+                palette[label_id] = label.color
+
+            output.putpalette(palette)
+            output.save(png_filename)
+            result_files.append(png_filename)
+            prog_bar.update()
+
+        return result_files
+
+    def format_results(self, results, imgfile_prefix=None, to_label_id=True):
+        """Format the results into dir (standard format for Cityscapes
+        evaluation).
+
+        Args:
+            results (list): Testing results of the dataset.
+            imgfile_prefix (str | None): The prefix of images files. It
+                includes the file path and the prefix of filename, e.g.,
+                "a/b/prefix". If not specified, a temp file will be created.
+                Default: None.
+            to_label_id (bool): whether convert output to label_id for
+                submission. Default: False
+
+        Returns:
+            tuple: (result_files, tmp_dir), result_files is a list containing
+                the image paths, tmp_dir is the temporal directory created
+                for saving json/png files when img_prefix is not specified.
+        """
+
+        assert isinstance(results, list), 'results must be a list'
+        assert len(results) == len(self), (
+            'The length of results is not equal to the dataset len: '
+            f'{len(results)} != {len(self)}')
+
+        if imgfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            imgfile_prefix = tmp_dir.name
+        else:
+            tmp_dir = None
+        result_files = self.results2img(results, imgfile_prefix, to_label_id)
+
+        return result_files, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric='mIoU',
+                 logger=None,
+                 imgfile_prefix=None,
+                 efficient_test=False):
+        """Evaluation in Cityscapes/default protocol.
+
+        Args:
+            results (list): Testing results of the dataset.
+            metric (str | list[str]): Metrics to be evaluated.
+            logger (logging.Logger | None | str): Logger used for printing
+                related information during evaluation. Default: None.
+            imgfile_prefix (str | None): The prefix of output image file,
+                for cityscapes evaluation only. It includes the file path and
+                the prefix of filename, e.g., "a/b/prefix".
+                If results are evaluated with cityscapes protocol, it would be
+                the prefix of output png files. The output files would be
+                png images under folder "a/b/prefix/xxx.png", where "xxx" is
+                the image name of cityscapes. If not specified, a temp file
+                will be created for evaluation.
+                Default: None.
+
+        Returns:
+            dict[str, float]: Cityscapes/default metrics.
+        """
+
+        eval_results = dict()
+        metrics = metric.copy() if isinstance(metric, list) else [metric]
+        if 'cityscapes' in metrics:
+            eval_results.update(
+                self._evaluate_cityscapes(results, logger, imgfile_prefix))
+            metrics.remove('cityscapes')
+        if len(metrics) > 0:
+            eval_results.update(
+                super(CityscapesDataset,
+                      self).evaluate(results, metrics, logger, efficient_test))
+
+        return eval_results
+
+    def _evaluate_cityscapes(self, results, logger, imgfile_prefix):
+        """Evaluation in Cityscapes protocol.
+
+        Args:
+            results (list): Testing results of the dataset.
+            logger (logging.Logger | str | None): Logger used for printing
+                related information during evaluation. Default: None.
+            imgfile_prefix (str | None): The prefix of output image file
+
+        Returns:
+            dict[str: float]: Cityscapes evaluation results.
+        """
+        try:
+            import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval  # noqa
+        except ImportError:
+            raise ImportError('Please run "pip install cityscapesscripts" to '
+                              'install cityscapesscripts first.')
+        msg = 'Evaluating in Cityscapes style'
+        if logger is None:
+            msg = '\n' + msg
+        print_log(msg, logger=logger)
+
+        result_files, tmp_dir = self.format_results(results, imgfile_prefix)
+
+        if tmp_dir is None:
+            result_dir = imgfile_prefix
+        else:
+            result_dir = tmp_dir.name
+
+        eval_results = dict()
+        print_log(f'Evaluating results under {result_dir} ...', logger=logger)
+
+        CSEval.args.evalInstLevelScore = True
+        CSEval.args.predictionPath = osp.abspath(result_dir)
+        CSEval.args.evalPixelAccuracy = True
+        CSEval.args.JSONOutput = False
+
+        seg_map_list = []
+        pred_list = []
+
+        # when evaluating with official cityscapesscripts,
+        # **_gtFine_labelIds.png is used
+        for seg_map in mmcv.scandir(
+                self.ann_dir, 'gtFine_labelIds.png', recursive=True):
+            seg_map_list.append(osp.join(self.ann_dir, seg_map))
+            pred_list.append(CSEval.getPrediction(CSEval.args, seg_map))
+
+        eval_results.update(
+            CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args))
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+
+        return eval_results
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/cocostuff.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/cocostuff.py
new file mode 100644
index 0000000000000000000000000000000000000000..c82743ef7dc211c156593b546c1bc136834f31b5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/cocostuff.py
@@ -0,0 +1,204 @@
+from .builder import DATASETS
+from .custom import CustomDataset
+#from IPython import embed
+
+@DATASETS.register_module()
+class CocoStuff(CustomDataset):
+    """Coco Stuff dataset.
+    """
+    nclass = 182
+    CLASSES = [str(i) for i in range(nclass)]
+
+    # random generated color
+    PALETTE = [
+         [167, 200, 7],
+         [127, 228, 215],
+         [26, 135, 248],
+         [238, 73, 166],
+         [91, 210, 215],
+         [122, 20, 236],
+         [234, 173, 35],
+         [34, 98, 46],
+         [115, 11, 206],
+         [52, 251, 238],
+         [209, 156, 236],
+         [239, 10, 0],
+         [26, 122, 36],
+         [162, 181, 66],
+         [26, 64, 22],
+         [46, 226, 200],
+         [89, 176, 6],
+         [103, 36, 32],
+         [74, 89, 159],
+         [250, 215, 25],
+         [57, 246, 82],
+         [51, 156, 111],
+         [139, 114, 219],
+         [65, 208, 253],
+         [33, 184, 119],
+         [230, 239, 58],
+         [176, 141, 158],
+         [21, 29, 31],
+         [135, 133, 163],
+         [152, 241, 248],
+         [253, 54, 7],
+         [231, 86, 229],
+         [179, 220, 46],
+         [155, 217, 185],
+         [58, 251, 190],
+         [40, 201, 63],
+         [236, 52, 220],
+         [71, 203, 170],
+         [96, 56, 41],
+         [252, 231, 125],
+         [255, 60, 100],
+         [11, 172, 184],
+         [127, 46, 248],
+         [1, 105, 163],
+         [191, 218, 95],
+         [87, 160, 119],
+         [149, 223, 79],
+         [216, 180, 245],
+         [58, 226, 163],
+         [11, 43, 118],
+         [20, 23, 100],
+         [71, 222, 109],
+         [124, 197, 150],
+         [38, 106, 43],
+         [115, 73, 156],
+         [113, 110, 50],
+         [94, 2, 184],
+         [163, 168, 155],
+         [83, 39, 145],
+         [150, 169, 81],
+         [134, 25, 2],
+         [145, 49, 138],
+         [46, 27, 209],
+         [145, 187, 117],
+         [197, 9, 211],
+         [179, 12, 118],
+         [107, 241, 133],
+         [255, 176, 224],
+         [49, 56, 217],
+         [10, 227, 177],
+         [152, 117, 25],
+         [139, 76, 23],
+         [53, 191, 10],
+         [14, 244, 90],
+         [247, 94, 189],
+         [202, 160, 149],
+         [24, 31, 150],
+         [164, 236, 24],
+         [47, 10, 204],
+         [84, 187, 44],
+         [17, 153, 55],
+         [9, 191, 39],
+         [216, 53, 216],
+         [54, 13, 26],
+         [241, 13, 196],
+         [157, 90, 225],
+         [99, 195, 27],
+         [20, 186, 253],
+         [175, 192, 0],
+         [81, 11, 238],
+         [137, 83, 196],
+         [53, 186, 24],
+         [231, 20, 101],
+         [246, 223, 173],
+         [75, 202, 249],
+         [9, 188, 201],
+         [216, 83, 7],
+         [152, 92, 54],
+         [137, 192, 79],
+         [242, 169, 49],
+         [99, 65, 207],
+         [178, 112, 1],
+         [120, 135, 40],
+         [71, 220, 82],
+         [180, 83, 172],
+         [68, 137, 75],
+         [46, 58, 15],
+         [0, 80, 68],
+         [175, 86, 173],
+         [19, 208, 152],
+         [215, 235, 142],
+         [95, 30, 166],
+         [246, 193, 8],
+         [222, 19, 72],
+         [177, 29, 183],
+         [238, 61, 178],
+         [246, 136, 87],
+         [199, 207, 174],
+         [218, 149, 231],
+         [98, 179, 168],
+         [23, 10, 10],
+         [223, 9, 253],
+         [206, 114, 95],
+         [177, 242, 152],
+         [115, 189, 142],
+         [254, 105, 107],
+         [59, 175, 153],
+         [42, 114, 178],
+         [50, 121, 91],
+         [78, 238, 175],
+         [232, 201, 123],
+         [61, 39, 248],
+         [76, 43, 218],
+         [121, 191, 38],
+         [13, 164, 242],
+         [83, 70, 160],
+         [109, 2, 64],
+         [252, 81, 105],
+         [151, 107, 83],
+         [31, 95, 170],
+         [7, 238, 218],
+         [227, 49, 19],
+         [56, 102, 49],
+         [152, 241, 48],
+         [110, 35, 108],
+         [59, 198, 242],
+         [186, 189, 39],
+         [26, 157, 41],
+         [183, 16, 169],
+         [114, 26, 104],
+         [131, 142, 127],
+         [118, 85, 219],
+         [203, 84, 210],
+         [245, 16, 127],
+         [57, 238, 110],
+         [223, 225, 154],
+         [143, 21, 231],
+         [12, 215, 113],
+         [117, 58, 3],
+         [170, 201, 252],
+         [60, 190, 197],
+         [38, 22, 24],
+         [37, 155, 237],
+         [175, 41, 211],
+         [188, 151, 129],
+         [231, 92, 102],
+         [229, 112, 245],
+         [157, 182, 40],
+         [1, 60, 204],
+         [57, 58, 19],
+         [156, 199, 180],
+         [211, 47, 8],
+         [153, 115, 233],
+         [172, 117, 198],
+         [33, 63, 208],
+         [107, 80, 154],
+         [217, 164, 13],
+         [136, 83, 59],
+         [53, 206, 6],
+         [95, 127, 75],
+         [110, 22, 240],
+         [244, 212, 2]
+    ]
+
+    assert len(CLASSES) == len(PALETTE)
+
+    def __init__(self, **kwargs):
+        super(CocoStuff, self).__init__(
+            img_suffix='.jpg',
+            seg_map_suffix='.png',
+            **kwargs)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/custom.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/custom.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc923fb42dfeb9ad04a6b19e5fe1370a93cf99b3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/custom.py
@@ -0,0 +1,380 @@
+import os
+import os.path as osp
+from functools import reduce
+
+import mmcv
+import numpy as np
+from mmcv.utils import print_log
+from terminaltables import AsciiTable
+from torch.utils.data import Dataset
+
+from mmseg.core import eval_metrics
+from mmseg.utils import get_root_logger
+from .builder import DATASETS
+from .pipelines import Compose
+
+
+@DATASETS.register_module()
+class CustomDataset(Dataset):
+    """Custom dataset for semantic segmentation. An example of file structure
+    is as followed.
+
+    .. code-block:: none
+
+        ├── data
+        │   ├── my_dataset
+        │   │   ├── img_dir
+        │   │   │   ├── train
+        │   │   │   │   ├── xxx{img_suffix}
+        │   │   │   │   ├── yyy{img_suffix}
+        │   │   │   │   ├── zzz{img_suffix}
+        │   │   │   ├── val
+        │   │   ├── ann_dir
+        │   │   │   ├── train
+        │   │   │   │   ├── xxx{seg_map_suffix}
+        │   │   │   │   ├── yyy{seg_map_suffix}
+        │   │   │   │   ├── zzz{seg_map_suffix}
+        │   │   │   ├── val
+
+    The img/gt_semantic_seg pair of CustomDataset should be of the same
+    except suffix. A valid img/gt_semantic_seg filename pair should be like
+    ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included
+    in the suffix). If split is given, then ``xxx`` is specified in txt file.
+    Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded.
+    Please refer to ``docs/tutorials/new_dataset.md`` for more details.
+
+
+    Args:
+        pipeline (list[dict]): Processing pipeline
+        img_dir (str): Path to image directory
+        img_suffix (str): Suffix of images. Default: '.jpg'
+        ann_dir (str, optional): Path to annotation directory. Default: None
+        seg_map_suffix (str): Suffix of segmentation maps. Default: '.png'
+        split (str, optional): Split txt file. If split is specified, only
+            file with suffix in the splits will be loaded. Otherwise, all
+            images in img_dir/ann_dir will be loaded. Default: None
+        data_root (str, optional): Data root for img_dir/ann_dir. Default:
+            None.
+        test_mode (bool): If test_mode=True, gt wouldn't be loaded.
+        ignore_index (int): The label index to be ignored. Default: 255
+        reduce_zero_label (bool): Whether to mark label zero as ignored.
+            Default: False
+        classes (str | Sequence[str], optional): Specify classes to load.
+            If is None, ``cls.CLASSES`` will be used. Default: None.
+        palette (Sequence[Sequence[int]]] | np.ndarray | None):
+            The palette of segmentation map. If None is given, and
+            self.PALETTE is None, random palette will be generated.
+            Default: None
+    """
+
+    CLASSES = None
+
+    PALETTE = None
+
+    def __init__(self,
+                 pipeline,
+                 img_dir,
+                 img_suffix='.jpg',
+                 ann_dir=None,
+                 seg_map_suffix='.png',
+                 split=None,
+                 data_root=None,
+                 test_mode=False,
+                 ignore_index=255,
+                 reduce_zero_label=False,
+                 classes=None,
+                 palette=None):
+        self.pipeline = Compose(pipeline)
+        self.img_dir = img_dir
+        self.img_suffix = img_suffix
+        self.ann_dir = ann_dir
+        self.seg_map_suffix = seg_map_suffix
+        self.split = split
+        self.data_root = data_root
+        self.test_mode = test_mode
+        self.ignore_index = ignore_index
+        self.reduce_zero_label = reduce_zero_label
+        self.label_map = None
+        self.CLASSES, self.PALETTE = self.get_classes_and_palette(
+            classes, palette)
+
+        # join paths if data_root is specified
+        if self.data_root is not None:
+            if not osp.isabs(self.img_dir):
+                self.img_dir = osp.join(self.data_root, self.img_dir)
+            if not (self.ann_dir is None or osp.isabs(self.ann_dir)):
+                self.ann_dir = osp.join(self.data_root, self.ann_dir)
+            if not (self.split is None or osp.isabs(self.split)):
+                self.split = osp.join(self.data_root, self.split)
+
+        # load annotations
+        self.img_infos = self.load_annotations(self.img_dir, self.img_suffix,
+                                               self.ann_dir,
+                                               self.seg_map_suffix, self.split)
+
+    def __len__(self):
+        """Total number of samples of data."""
+        return len(self.img_infos)
+
+    def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix,
+                         split):
+        """Load annotation from directory.
+
+        Args:
+            img_dir (str): Path to image directory
+            img_suffix (str): Suffix of images.
+            ann_dir (str|None): Path to annotation directory.
+            seg_map_suffix (str|None): Suffix of segmentation maps.
+            split (str|None): Split txt file. If split is specified, only file
+                with suffix in the splits will be loaded. Otherwise, all images
+                in img_dir/ann_dir will be loaded. Default: None
+
+        Returns:
+            list[dict]: All image info of dataset.
+        """
+
+        img_infos = []
+        if split is not None:
+            with open(split) as f:
+                for line in f:
+                    img_name = line.strip()
+                    img_info = dict(filename=img_name + img_suffix)
+                    if ann_dir is not None:
+                        seg_map = img_name + seg_map_suffix
+                        img_info['ann'] = dict(seg_map=seg_map)
+                    img_infos.append(img_info)
+        else:
+            for img in mmcv.scandir(img_dir, img_suffix, recursive=True):
+                img_info = dict(filename=img)
+                if ann_dir is not None:
+                    seg_map = img.replace(img_suffix, seg_map_suffix)
+                    img_info['ann'] = dict(seg_map=seg_map)
+                img_infos.append(img_info)
+
+        print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger())
+        return img_infos
+
+    def get_ann_info(self, idx):
+        """Get annotation by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+
+        return self.img_infos[idx]['ann']
+
+    def pre_pipeline(self, results):
+        """Prepare results dict for pipeline."""
+        results['seg_fields'] = []
+        results['img_prefix'] = self.img_dir
+        results['seg_prefix'] = self.ann_dir
+        if self.custom_classes:
+            results['label_map'] = self.label_map
+
+    def __getitem__(self, idx):
+        """Get training/test data after pipeline.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Training/test data (with annotation if `test_mode` is set
+                False).
+        """
+
+        if self.test_mode:
+            return self.prepare_test_img(idx)
+        else:
+            return self.prepare_train_img(idx)
+
+    def prepare_train_img(self, idx):
+        """Get training data and annotations after pipeline.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Training data and annotation after pipeline with new keys
+                introduced by pipeline.
+        """
+
+        img_info = self.img_infos[idx]
+        ann_info = self.get_ann_info(idx)
+        results = dict(img_info=img_info, ann_info=ann_info)
+        self.pre_pipeline(results)
+        return self.pipeline(results)
+
+    def prepare_test_img(self, idx):
+        """Get testing data after pipeline.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Testing data after pipeline with new keys intorduced by
+                piepline.
+        """
+
+        img_info = self.img_infos[idx]
+        results = dict(img_info=img_info)
+        self.pre_pipeline(results)
+        return self.pipeline(results)
+
+    def format_results(self, results, **kwargs):
+        """Place holder to format result to dataset specific output."""
+        pass
+
+    def get_gt_seg_maps(self, efficient_test=False):
+        """Get ground truth segmentation maps for evaluation."""
+        gt_seg_maps = []
+        for img_info in self.img_infos:
+            seg_map = osp.join(self.ann_dir, img_info['ann']['seg_map'])
+            if efficient_test:
+                gt_seg_map = seg_map
+            else:
+                gt_seg_map = mmcv.imread(
+                    seg_map, flag='unchanged', backend='pillow')
+            gt_seg_maps.append(gt_seg_map)
+        return gt_seg_maps
+
+    def get_classes_and_palette(self, classes=None, palette=None):
+        """Get class names of current dataset.
+
+        Args:
+            classes (Sequence[str] | str | None): If classes is None, use
+                default CLASSES defined by builtin dataset. If classes is a
+                string, take it as a file name. The file contains the name of
+                classes where each line contains one class name. If classes is
+                a tuple or list, override the CLASSES defined by the dataset.
+            palette (Sequence[Sequence[int]]] | np.ndarray | None):
+                The palette of segmentation map. If None is given, random
+                palette will be generated. Default: None
+        """
+        if classes is None:
+            self.custom_classes = False
+            return self.CLASSES, self.PALETTE
+
+        self.custom_classes = True
+        if isinstance(classes, str):
+            # take it as a file path
+            class_names = mmcv.list_from_file(classes)
+        elif isinstance(classes, (tuple, list)):
+            class_names = classes
+        else:
+            raise ValueError(f'Unsupported type {type(classes)} of classes.')
+
+        if self.CLASSES:
+            if not set(classes).issubset(self.CLASSES):
+                raise ValueError('classes is not a subset of CLASSES.')
+
+            # dictionary, its keys are the old label ids and its values
+            # are the new label ids.
+            # used for changing pixel labels in load_annotations.
+            self.label_map = {}
+            for i, c in enumerate(self.CLASSES):
+                if c not in class_names:
+                    self.label_map[i] = -1
+                else:
+                    self.label_map[i] = classes.index(c)
+
+        palette = self.get_palette_for_custom_classes(class_names, palette)
+
+        return class_names, palette
+
+    def get_palette_for_custom_classes(self, class_names, palette=None):
+
+        if self.label_map is not None:
+            # return subset of palette
+            palette = []
+            for old_id, new_id in sorted(
+                    self.label_map.items(), key=lambda x: x[1]):
+                if new_id != -1:
+                    palette.append(self.PALETTE[old_id])
+            palette = type(self.PALETTE)(palette)
+
+        elif palette is None:
+            if self.PALETTE is None:
+                palette = np.random.randint(0, 255, size=(len(class_names), 3))
+            else:
+                palette = self.PALETTE
+
+        return palette
+
+    def evaluate(self,
+                 results,
+                 metric='mIoU',
+                 logger=None,
+                 efficient_test=False,
+                 **kwargs):
+        """Evaluate the dataset.
+
+        Args:
+            results (list): Testing results of the dataset.
+            metric (str | list[str]): Metrics to be evaluated. 'mIoU' and
+                'mDice' are supported.
+            logger (logging.Logger | None | str): Logger used for printing
+                related information during evaluation. Default: None.
+
+        Returns:
+            dict[str, float]: Default metrics.
+        """
+
+        if isinstance(metric, str):
+            metric = [metric]
+        allowed_metrics = ['mIoU', 'mDice']
+        if not set(metric).issubset(set(allowed_metrics)):
+            raise KeyError('metric {} is not supported'.format(metric))
+        eval_results = {}
+        gt_seg_maps = self.get_gt_seg_maps(efficient_test)
+        if self.CLASSES is None:
+            num_classes = len(
+                reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps]))
+        else:
+            num_classes = len(self.CLASSES)
+        ret_metrics = eval_metrics(
+            results,
+            gt_seg_maps,
+            num_classes,
+            self.ignore_index,
+            metric,
+            label_map=self.label_map,
+            reduce_zero_label=self.reduce_zero_label)
+        class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']]
+        if self.CLASSES is None:
+            class_names = tuple(range(num_classes))
+        else:
+            class_names = self.CLASSES
+        ret_metrics_round = [
+            np.round(ret_metric * 100, 2) for ret_metric in ret_metrics
+        ]
+        for i in range(num_classes):
+            class_table_data.append([class_names[i]] +
+                                    [m[i] for m in ret_metrics_round[2:]] +
+                                    [ret_metrics_round[1][i]])
+        summary_table_data = [['Scope'] +
+                              ['m' + head
+                               for head in class_table_data[0][1:]] + ['aAcc']]
+        ret_metrics_mean = [
+            np.round(np.nanmean(ret_metric) * 100, 2)
+            for ret_metric in ret_metrics
+        ]
+        summary_table_data.append(['global'] + ret_metrics_mean[2:] +
+                                  [ret_metrics_mean[1]] +
+                                  [ret_metrics_mean[0]])
+        print_log('per class results:', logger)
+        table = AsciiTable(class_table_data)
+        print_log('\n' + table.table, logger=logger)
+        print_log('Summary:', logger)
+        table = AsciiTable(summary_table_data)
+        print_log('\n' + table.table, logger=logger)
+
+        for i in range(1, len(summary_table_data[0])):
+            eval_results[summary_table_data[0]
+                         [i]] = summary_table_data[1][i] / 100.0
+        if mmcv.is_list_of(results, str):
+            for file_name in results:
+                os.remove(file_name)
+        return eval_results
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/dataset_wrappers.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/dataset_wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6a5e957ec3b44465432617cf6e8f0b86a8a5efa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/dataset_wrappers.py
@@ -0,0 +1,50 @@
+from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
+
+from .builder import DATASETS
+
+
+@DATASETS.register_module()
+class ConcatDataset(_ConcatDataset):
+    """A wrapper of concatenated dataset.
+
+    Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
+    concat the group flag for image aspect ratio.
+
+    Args:
+        datasets (list[:obj:`Dataset`]): A list of datasets.
+    """
+
+    def __init__(self, datasets):
+        super(ConcatDataset, self).__init__(datasets)
+        self.CLASSES = datasets[0].CLASSES
+        self.PALETTE = datasets[0].PALETTE
+
+
+@DATASETS.register_module()
+class RepeatDataset(object):
+    """A wrapper of repeated dataset.
+
+    The length of repeated dataset will be `times` larger than the original
+    dataset. This is useful when the data loading time is long but the dataset
+    is small. Using RepeatDataset can reduce the data loading time between
+    epochs.
+
+    Args:
+        dataset (:obj:`Dataset`): The dataset to be repeated.
+        times (int): Repeat times.
+    """
+
+    def __init__(self, dataset, times):
+        self.dataset = dataset
+        self.times = times
+        self.CLASSES = dataset.CLASSES
+        self.PALETTE = dataset.PALETTE
+        self._ori_len = len(self.dataset)
+
+    def __getitem__(self, idx):
+        """Get item from original dataset."""
+        return self.dataset[idx % self._ori_len]
+
+    def __len__(self):
+        """The length is multiplied by ``times``"""
+        return self.times * self._ori_len
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cbfda8ae74bdf26c5aef197ff2866a7c7ad0cfd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/drive.py
@@ -0,0 +1,27 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class DRIVEDataset(CustomDataset):
+    """DRIVE dataset.
+
+    In segmentation map annotation for DRIVE, 0 stands for background, which is
+    included in 2 categories. ``reduce_zero_label`` is fixed to False. The
+    ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
+    '_manual1.png'.
+    """
+
+    CLASSES = ('background', 'vessel')
+
+    PALETTE = [[120, 120, 120], [6, 230, 230]]
+
+    def __init__(self, **kwargs):
+        super(DRIVEDataset, self).__init__(
+            img_suffix='.png',
+            seg_map_suffix='_manual1.png',
+            reduce_zero_label=False,
+            **kwargs)
+        assert osp.exists(self.img_dir)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..923203b51377f9344277fc561803d7a78bd2c684
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/hrf.py
@@ -0,0 +1,27 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class HRFDataset(CustomDataset):
+    """HRF dataset.
+
+    In segmentation map annotation for HRF, 0 stands for background, which is
+    included in 2 categories. ``reduce_zero_label`` is fixed to False. The
+    ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
+    '.png'.
+    """
+
+    CLASSES = ('background', 'vessel')
+
+    PALETTE = [[120, 120, 120], [6, 230, 230]]
+
+    def __init__(self, **kwargs):
+        super(HRFDataset, self).__init__(
+            img_suffix='.png',
+            seg_map_suffix='.png',
+            reduce_zero_label=False,
+            **kwargs)
+        assert osp.exists(self.img_dir)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/mapillary.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/mapillary.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa6a828c77f1188b97e141c84f7ed053e30b6424
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/mapillary.py
@@ -0,0 +1,46 @@
+from .builder import DATASETS
+from .custom import CustomDataset
+#from IPython import embed
+
+@DATASETS.register_module()
+class MapillaryDataset(CustomDataset):
+    """Mapillary dataset.
+    """
+    CLASSES = ('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier',
+               'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', 'Pedestrian Area',
+               'Rail Track', 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', 'Tunnel',
+               'Person', 'Bicyclist', 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk',
+               'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation',
+               'Water', 'Banner', 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera',
+               'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole',
+               'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light',
+               'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat',
+               'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer',
+               'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled')
+
+    PALETTE = [[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153],
+               [180, 165, 180], [90, 120, 150], [
+                   102, 102, 156], [128, 64, 255],
+               [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
+               [230, 150, 140], [128, 64, 128], [
+                   110, 110, 110], [244, 35, 232],
+               [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60],
+               [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128],
+               [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180],
+               [190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30],
+               [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220],
+               [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40],
+               [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150],
+               [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80],
+               [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20],
+               [119, 11, 32], [150, 0, 255], [
+                   0, 60, 100], [0, 0, 142], [0, 0, 90],
+               [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], [0, 0, 70],
+               [0, 0, 192], [32, 32, 32], [120, 10, 10], [0, 0, 0]]
+
+    def __init__(self, **kwargs):
+        super(MapillaryDataset, self).__init__(
+            img_suffix='.jpg',
+            seg_map_suffix='.png',
+            reduce_zero_label=False,
+            **kwargs)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab42877f1e0c60099303a05021ea288f9c1c6e15
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pascal_context.py
@@ -0,0 +1,54 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class PascalContextDataset(CustomDataset):
+    """PascalContext dataset.
+
+    In segmentation map annotation for PascalContext, 0 stands for background,
+    which is included in 60 categories. ``reduce_zero_label`` is fixed to
+    False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
+    fixed to '.png'.
+
+    Args:
+        split (str): Split txt file for PascalContext.
+    """
+
+    CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+               'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse',
+               'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+               'tvmonitor', 'bag', 'bed', 'bench', 'book', 'building',
+               'cabinet', 'ceiling', 'cloth', 'computer', 'cup', 'door',
+               'fence', 'floor', 'flower', 'food', 'grass', 'ground',
+               'keyboard', 'light', 'mountain', 'mouse', 'curtain', 'platform',
+               'sign', 'plate', 'road', 'rock', 'shelves', 'sidewalk', 'sky',
+               'snow', 'bedclothes', 'track', 'tree', 'truck', 'wall', 'water',
+               'window', 'wood')
+
+    PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
+               [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
+               [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
+               [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
+               [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
+               [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
+               [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
+               [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
+               [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
+               [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
+               [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
+               [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
+               [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
+               [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
+               [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]]
+
+    def __init__(self, split, **kwargs):
+        super(PascalContextDataset, self).__init__(
+            img_suffix='.jpg',
+            seg_map_suffix='.png',
+            split=split,
+            reduce_zero_label=False,
+            **kwargs)
+        assert osp.exists(self.img_dir) and self.split is not None
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bc50a88df38f9afc6d157f0077e37baf043dc11
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/__init__.py
@@ -0,0 +1,16 @@
+from .compose import Compose
+from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor,
+                        Transpose, to_tensor)
+from .loading import LoadAnnotations, LoadImageFromFile
+from .test_time_aug import MultiScaleFlipAug
+from .transforms import (AlignedResize, CLAHE, AdjustGamma, Normalize, Pad,
+                         PhotoMetricDistortion, RandomCrop, RandomFlip,
+                         RandomRotate, Rerange, Resize, RGB2Gray, SegRescale)
+
+__all__ = [
+    'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
+    'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
+    'MultiScaleFlipAug', 'AlignedResize', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop',
+    'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate',
+    'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/compose.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/compose.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca48f1c935755c486edc2744e1713e2b5ba3cdc8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/compose.py
@@ -0,0 +1,51 @@
+import collections
+
+from mmcv.utils import build_from_cfg
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class Compose(object):
+    """Compose multiple transforms sequentially.
+
+    Args:
+        transforms (Sequence[dict | callable]): Sequence of transform object or
+            config dict to be composed.
+    """
+
+    def __init__(self, transforms):
+        assert isinstance(transforms, collections.abc.Sequence)
+        self.transforms = []
+        for transform in transforms:
+            if isinstance(transform, dict):
+                transform = build_from_cfg(transform, PIPELINES)
+                self.transforms.append(transform)
+            elif callable(transform):
+                self.transforms.append(transform)
+            else:
+                raise TypeError('transform must be callable or a dict')
+
+    def __call__(self, data):
+        """Call function to apply transforms sequentially.
+
+        Args:
+            data (dict): A result dict contains the data to transform.
+
+        Returns:
+           dict: Transformed data.
+        """
+
+        for t in self.transforms:
+            data = t(data)
+            if data is None:
+                return None
+        return data
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += f'    {t}'
+        format_string += '\n)'
+        return format_string
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/formating.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/formating.py
new file mode 100644
index 0000000000000000000000000000000000000000..34061c1dd160d4b00aac8dbdc82dccf5c3883ce8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/formating.py
@@ -0,0 +1,288 @@
+from collections.abc import Sequence
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.parallel import DataContainer as DC
+
+from ..builder import PIPELINES
+
+
+def to_tensor(data):
+    """Convert objects of various python types to :obj:`torch.Tensor`.
+
+    Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
+    :class:`Sequence`, :class:`int` and :class:`float`.
+
+    Args:
+        data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
+            be converted.
+    """
+
+    if isinstance(data, torch.Tensor):
+        return data
+    elif isinstance(data, np.ndarray):
+        return torch.from_numpy(data)
+    elif isinstance(data, Sequence) and not mmcv.is_str(data):
+        return torch.tensor(data)
+    elif isinstance(data, int):
+        return torch.LongTensor([data])
+    elif isinstance(data, float):
+        return torch.FloatTensor([data])
+    else:
+        raise TypeError(f'type {type(data)} cannot be converted to tensor.')
+
+
+@PIPELINES.register_module()
+class ToTensor(object):
+    """Convert some results to :obj:`torch.Tensor` by given keys.
+
+    Args:
+        keys (Sequence[str]): Keys that need to be converted to Tensor.
+    """
+
+    def __init__(self, keys):
+        self.keys = keys
+
+    def __call__(self, results):
+        """Call function to convert data in results to :obj:`torch.Tensor`.
+
+        Args:
+            results (dict): Result dict contains the data to convert.
+
+        Returns:
+            dict: The result dict contains the data converted
+                to :obj:`torch.Tensor`.
+        """
+
+        for key in self.keys:
+            results[key] = to_tensor(results[key])
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(keys={self.keys})'
+
+
+@PIPELINES.register_module()
+class ImageToTensor(object):
+    """Convert image to :obj:`torch.Tensor` by given keys.
+
+    The dimension order of input image is (H, W, C). The pipeline will convert
+    it to (C, H, W). If only 2 dimension (H, W) is given, the output would be
+    (1, H, W).
+
+    Args:
+        keys (Sequence[str]): Key of images to be converted to Tensor.
+    """
+
+    def __init__(self, keys):
+        self.keys = keys
+
+    def __call__(self, results):
+        """Call function to convert image in results to :obj:`torch.Tensor` and
+        transpose the channel order.
+
+        Args:
+            results (dict): Result dict contains the image data to convert.
+
+        Returns:
+            dict: The result dict contains the image converted
+                to :obj:`torch.Tensor` and transposed to (C, H, W) order.
+        """
+
+        for key in self.keys:
+            img = results[key]
+            if len(img.shape) < 3:
+                img = np.expand_dims(img, -1)
+            results[key] = to_tensor(img.transpose(2, 0, 1))
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(keys={self.keys})'
+
+
+@PIPELINES.register_module()
+class Transpose(object):
+    """Transpose some results by given keys.
+
+    Args:
+        keys (Sequence[str]): Keys of results to be transposed.
+        order (Sequence[int]): Order of transpose.
+    """
+
+    def __init__(self, keys, order):
+        self.keys = keys
+        self.order = order
+
+    def __call__(self, results):
+        """Call function to convert image in results to :obj:`torch.Tensor` and
+        transpose the channel order.
+
+        Args:
+            results (dict): Result dict contains the image data to convert.
+
+        Returns:
+            dict: The result dict contains the image converted
+                to :obj:`torch.Tensor` and transposed to (C, H, W) order.
+        """
+
+        for key in self.keys:
+            results[key] = results[key].transpose(self.order)
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + \
+               f'(keys={self.keys}, order={self.order})'
+
+
+@PIPELINES.register_module()
+class ToDataContainer(object):
+    """Convert results to :obj:`mmcv.DataContainer` by given fields.
+
+    Args:
+        fields (Sequence[dict]): Each field is a dict like
+            ``dict(key='xxx', **kwargs)``. The ``key`` in result will
+            be converted to :obj:`mmcv.DataContainer` with ``**kwargs``.
+            Default: ``(dict(key='img', stack=True),
+            dict(key='gt_semantic_seg'))``.
+    """
+
+    def __init__(self,
+                 fields=(dict(key='img',
+                              stack=True), dict(key='gt_semantic_seg'))):
+        self.fields = fields
+
+    def __call__(self, results):
+        """Call function to convert data in results to
+        :obj:`mmcv.DataContainer`.
+
+        Args:
+            results (dict): Result dict contains the data to convert.
+
+        Returns:
+            dict: The result dict contains the data converted to
+                :obj:`mmcv.DataContainer`.
+        """
+
+        for field in self.fields:
+            field = field.copy()
+            key = field.pop('key')
+            results[key] = DC(results[key], **field)
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(fields={self.fields})'
+
+
+@PIPELINES.register_module()
+class DefaultFormatBundle(object):
+    """Default formatting bundle.
+
+    It simplifies the pipeline of formatting common fields, including "img"
+    and "gt_semantic_seg". These fields are formatted as follows.
+
+    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
+                       (3)to DataContainer (stack=True)
+    """
+
+    def __call__(self, results):
+        """Call function to transform and format common fields in results.
+
+        Args:
+            results (dict): Result dict contains the data to convert.
+
+        Returns:
+            dict: The result dict contains the data that is formatted with
+                default bundle.
+        """
+
+        if 'img' in results:
+            img = results['img']
+            if len(img.shape) < 3:
+                img = np.expand_dims(img, -1)
+            img = np.ascontiguousarray(img.transpose(2, 0, 1))
+            results['img'] = DC(to_tensor(img), stack=True)
+        if 'gt_semantic_seg' in results:
+            # convert to long
+            results['gt_semantic_seg'] = DC(
+                to_tensor(results['gt_semantic_seg'][None,
+                                                     ...].astype(np.int64)),
+                stack=True)
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+
+@PIPELINES.register_module()
+class Collect(object):
+    """Collect data from the loader relevant to the specific task.
+
+    This is usually the last stage of the data loader pipeline. Typically keys
+    is set to some subset of "img", "gt_semantic_seg".
+
+    The "img_meta" item is always populated.  The contents of the "img_meta"
+    dictionary depends on "meta_keys". By default this includes:
+
+        - "img_shape": shape of the image input to the network as a tuple
+            (h, w, c).  Note that images may be zero padded on the bottom/right
+            if the batch tensor is larger than this shape.
+
+        - "scale_factor": a float indicating the preprocessing scale
+
+        - "flip": a boolean indicating if image flip transform was used
+
+        - "filename": path to the image file
+
+        - "ori_shape": original shape of the image as a tuple (h, w, c)
+
+        - "pad_shape": image shape after padding
+
+        - "img_norm_cfg": a dict of normalization information:
+            - mean - per channel mean subtraction
+            - std - per channel std divisor
+            - to_rgb - bool indicating if bgr was converted to rgb
+
+    Args:
+        keys (Sequence[str]): Keys of results to be collected in ``data``.
+        meta_keys (Sequence[str], optional): Meta keys to be converted to
+            ``mmcv.DataContainer`` and collected in ``data[img_metas]``.
+            Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
+            'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+            'img_norm_cfg')``
+    """
+
+    def __init__(self,
+                 keys,
+                 meta_keys=('filename', 'ori_filename', 'ori_shape',
+                            'img_shape', 'pad_shape', 'scale_factor', 'flip',
+                            'flip_direction', 'img_norm_cfg')):
+        self.keys = keys
+        self.meta_keys = meta_keys
+
+    def __call__(self, results):
+        """Call function to collect keys in results. The keys in ``meta_keys``
+        will be converted to :obj:mmcv.DataContainer.
+
+        Args:
+            results (dict): Result dict contains the data to collect.
+
+        Returns:
+            dict: The result dict contains the following keys
+                - keys in``self.keys``
+                - ``img_metas``
+        """
+
+        data = {}
+        img_meta = {}
+        for key in self.meta_keys:
+            img_meta[key] = results[key]
+        data['img_metas'] = DC(img_meta, cpu_only=True)
+        for key in self.keys:
+            data[key] = results[key]
+        return data
+
+    def __repr__(self):
+        return self.__class__.__name__ + \
+               f'(keys={self.keys}, meta_keys={self.meta_keys})'
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/loading.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdfc496ba96828a435febbef958fdae499d034f7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/loading.py
@@ -0,0 +1,153 @@
+import os.path as osp
+
+import mmcv
+import numpy as np
+
+from ..builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class LoadImageFromFile(object):
+    """Load an image from file.
+
+    Required keys are "img_prefix" and "img_info" (a dict that must contain the
+    key "filename"). Added or updated keys are "filename", "img", "img_shape",
+    "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
+    "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
+
+    Args:
+        to_float32 (bool): Whether to convert the loaded image to a float32
+            numpy array. If set to False, the loaded image is an uint8 array.
+            Defaults to False.
+        color_type (str): The flag argument for :func:`mmcv.imfrombytes`.
+            Defaults to 'color'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmcv.fileio.FileClient` for details.
+            Defaults to ``dict(backend='disk')``.
+        imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default:
+            'cv2'
+    """
+
+    def __init__(self,
+                 to_float32=False,
+                 color_type='color',
+                 file_client_args=dict(backend='disk'),
+                 imdecode_backend='cv2'):
+        self.to_float32 = to_float32
+        self.color_type = color_type
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+        self.imdecode_backend = imdecode_backend
+
+    def __call__(self, results):
+        """Call functions to load image and get image meta information.
+
+        Args:
+            results (dict): Result dict from :obj:`mmseg.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded image and meta information.
+        """
+
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+
+        if results.get('img_prefix') is not None:
+            filename = osp.join(results['img_prefix'],
+                                results['img_info']['filename'])
+        else:
+            filename = results['img_info']['filename']
+        img_bytes = self.file_client.get(filename)
+        img = mmcv.imfrombytes(
+            img_bytes, flag=self.color_type, backend=self.imdecode_backend)
+        if self.to_float32:
+            img = img.astype(np.float32)
+
+        results['filename'] = filename
+        results['ori_filename'] = results['img_info']['filename']
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['ori_shape'] = img.shape
+        # Set initial values for default meta_keys
+        results['pad_shape'] = img.shape
+        results['scale_factor'] = 1.0
+        num_channels = 1 if len(img.shape) < 3 else img.shape[2]
+        results['img_norm_cfg'] = dict(
+            mean=np.zeros(num_channels, dtype=np.float32),
+            std=np.ones(num_channels, dtype=np.float32),
+            to_rgb=False)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(to_float32={self.to_float32},'
+        repr_str += f"color_type='{self.color_type}',"
+        repr_str += f"imdecode_backend='{self.imdecode_backend}')"
+        return repr_str
+
+
+@PIPELINES.register_module()
+class LoadAnnotations(object):
+    """Load annotations for semantic segmentation.
+
+    Args:
+        reduce_zero_label (bool): Whether reduce all label value by 1.
+            Usually used for datasets where 0 is background label.
+            Default: False.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmcv.fileio.FileClient` for details.
+            Defaults to ``dict(backend='disk')``.
+        imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default:
+            'pillow'
+    """
+
+    def __init__(self,
+                 reduce_zero_label=False,
+                 file_client_args=dict(backend='disk'),
+                 imdecode_backend='pillow'):
+        self.reduce_zero_label = reduce_zero_label
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+        self.imdecode_backend = imdecode_backend
+
+    def __call__(self, results):
+        """Call function to load multiple types annotations.
+
+        Args:
+            results (dict): Result dict from :obj:`mmseg.CustomDataset`.
+
+        Returns:
+            dict: The dict contains loaded semantic segmentation annotations.
+        """
+
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+
+        if results.get('seg_prefix', None) is not None:
+            filename = osp.join(results['seg_prefix'],
+                                results['ann_info']['seg_map'])
+        else:
+            filename = results['ann_info']['seg_map']
+        img_bytes = self.file_client.get(filename)
+        gt_semantic_seg = mmcv.imfrombytes(
+            img_bytes, flag='unchanged',
+            backend=self.imdecode_backend).squeeze().astype(np.uint8)
+        # modify if custom classes
+        if results.get('label_map', None) is not None:
+            for old_id, new_id in results['label_map'].items():
+                gt_semantic_seg[gt_semantic_seg == old_id] = new_id
+        # reduce zero_label
+        if self.reduce_zero_label:
+            # avoid using underflow conversion
+            gt_semantic_seg[gt_semantic_seg == 0] = 255
+            gt_semantic_seg = gt_semantic_seg - 1
+            gt_semantic_seg[gt_semantic_seg == 254] = 255
+        results['gt_semantic_seg'] = gt_semantic_seg
+        results['seg_fields'].append('gt_semantic_seg')
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(reduce_zero_label={self.reduce_zero_label},'
+        repr_str += f"imdecode_backend='{self.imdecode_backend}')"
+        return repr_str
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/test_time_aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/test_time_aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..473a12bc86b57e564c415ff8bdb1e431425370db
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/test_time_aug.py
@@ -0,0 +1,133 @@
+import warnings
+
+import mmcv
+
+from ..builder import PIPELINES
+from .compose import Compose
+
+
+@PIPELINES.register_module()
+class MultiScaleFlipAug(object):
+    """Test-time augmentation with multiple scales and flipping.
+
+    An example configuration is as followed:
+
+    .. code-block::
+
+        img_scale=(2048, 1024),
+        img_ratios=[0.5, 1.0],
+        flip=True,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ]
+
+    After MultiScaleFLipAug with above configuration, the results are wrapped
+    into lists of the same length as followed:
+
+    .. code-block::
+
+        dict(
+            img=[...],
+            img_shape=[...],
+            scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)]
+            flip=[False, True, False, True]
+            ...
+        )
+
+    Args:
+        transforms (list[dict]): Transforms to apply in each augmentation.
+        img_scale (None | tuple | list[tuple]): Images scales for resizing.
+        img_ratios (float | list[float]): Image ratios for resizing
+        flip (bool): Whether apply flip augmentation. Default: False.
+        flip_direction (str | list[str]): Flip augmentation directions,
+            options are "horizontal" and "vertical". If flip_direction is list,
+            multiple flip augmentations will be applied.
+            It has no effect when flip == False. Default: "horizontal".
+    """
+
+    def __init__(self,
+                 transforms,
+                 img_scale,
+                 img_ratios=None,
+                 flip=False,
+                 flip_direction='horizontal'):
+        self.transforms = Compose(transforms)
+        if img_ratios is not None:
+            img_ratios = img_ratios if isinstance(img_ratios,
+                                                  list) else [img_ratios]
+            assert mmcv.is_list_of(img_ratios, float)
+        if img_scale is None:
+            # mode 1: given img_scale=None and a range of image ratio
+            self.img_scale = None
+            assert mmcv.is_list_of(img_ratios, float)
+        elif isinstance(img_scale, tuple) and mmcv.is_list_of(
+                img_ratios, float):
+            assert len(img_scale) == 2
+            # mode 2: given a scale and a range of image ratio
+            self.img_scale = [(int(img_scale[0] * ratio),
+                               int(img_scale[1] * ratio))
+                              for ratio in img_ratios]
+        else:
+            # mode 3: given multiple scales
+            self.img_scale = img_scale if isinstance(img_scale,
+                                                     list) else [img_scale]
+        assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None
+        self.flip = flip
+        self.img_ratios = img_ratios
+        self.flip_direction = flip_direction if isinstance(
+            flip_direction, list) else [flip_direction]
+        assert mmcv.is_list_of(self.flip_direction, str)
+        if not self.flip and self.flip_direction != ['horizontal']:
+            warnings.warn(
+                'flip_direction has no effect when flip is set to False')
+        if (self.flip
+                and not any([t['type'] == 'RandomFlip' for t in transforms])):
+            warnings.warn(
+                'flip has no effect when RandomFlip is not in transforms')
+
+    def __call__(self, results):
+        """Call function to apply test time augment transforms on results.
+
+        Args:
+            results (dict): Result dict contains the data to transform.
+
+        Returns:
+           dict[str: list]: The augmented data, where each value is wrapped
+               into a list.
+        """
+
+        aug_data = []
+        if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float):
+            h, w = results['img'].shape[:2]
+            img_scale = [(int(w * ratio), int(h * ratio))
+                         for ratio in self.img_ratios]
+        else:
+            img_scale = self.img_scale
+        flip_aug = [False, True] if self.flip else [False]
+        for scale in img_scale:
+            for flip in flip_aug:
+                for direction in self.flip_direction:
+                    _results = results.copy()
+                    _results['scale'] = scale
+                    _results['flip'] = flip
+                    _results['flip_direction'] = direction
+                    data = self.transforms(_results)
+                    aug_data.append(data)
+        # list of dict to dict of list
+        aug_data_dict = {key: [] for key in aug_data[0]}
+        for data in aug_data:
+            for key, val in data.items():
+                aug_data_dict[key].append(val)
+        return aug_data_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(transforms={self.transforms}, '
+        repr_str += f'img_scale={self.img_scale}, flip={self.flip})'
+        repr_str += f'flip_direction={self.flip_direction}'
+        return repr_str
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/transforms.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..3df4c092362886c6fccd31b7f427143ab8e47f26
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/pipelines/transforms.py
@@ -0,0 +1,1215 @@
+import mmcv
+import numpy as np
+from mmcv.utils import deprecated_api_warning, is_tuple_of
+from numpy import random
+
+from ..builder import PIPELINES
+#from IPython import embed
+
+@PIPELINES.register_module()
+class AlignedResize(object):
+    """Resize images & seg.
+    
+    This class is based on ``Resize``, the only difference is 
+    it ensure the long and short sides are divisible by ``size_divisor``.
+    """
+
+    def __init__(self,
+                 img_scale=None,
+                 multiscale_mode='range',
+                 ratio_range=None,
+                 keep_ratio=True,
+                 size_divisor=32):
+        if img_scale is None:
+            self.img_scale = None
+        else:
+            if isinstance(img_scale, list):
+                self.img_scale = img_scale
+            else:
+                self.img_scale = [img_scale]
+            assert mmcv.is_list_of(self.img_scale, tuple)
+
+        if ratio_range is not None:
+            # mode 1: given img_scale=None and a range of image ratio
+            # mode 2: given a scale and a range of image ratio
+            assert self.img_scale is None or len(self.img_scale) == 1
+        else:
+            # mode 3 and 4: given multiple scales or a range of scales
+            assert multiscale_mode in ['value', 'range']
+
+        self.multiscale_mode = multiscale_mode
+        self.ratio_range = ratio_range
+        self.keep_ratio = keep_ratio
+        self.size_divisor = size_divisor
+
+    @staticmethod
+    def random_select(img_scales):
+        """Randomly select an img_scale from given candidates.
+
+        Args:
+            img_scales (list[tuple]): Images scales for selection.
+
+        Returns:
+            (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
+                where ``img_scale`` is the selected image scale and
+                ``scale_idx`` is the selected index in the given candidates.
+        """
+
+        assert mmcv.is_list_of(img_scales, tuple)
+        scale_idx = np.random.randint(len(img_scales))
+        img_scale = img_scales[scale_idx]
+        return img_scale, scale_idx
+
+    @staticmethod
+    def random_sample(img_scales):
+        """Randomly sample an img_scale when ``multiscale_mode=='range'``.
+
+        Args:
+            img_scales (list[tuple]): Images scale range for sampling.
+                There must be two tuples in img_scales, which specify the lower
+                and uper bound of image scales.
+
+        Returns:
+            (tuple, None): Returns a tuple ``(img_scale, None)``, where
+                ``img_scale`` is sampled scale and None is just a placeholder
+                to be consistent with :func:`random_select`.
+        """
+
+        assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
+        img_scale_long = [max(s) for s in img_scales]
+        img_scale_short = [min(s) for s in img_scales]
+        long_edge = np.random.randint(
+            min(img_scale_long),
+            max(img_scale_long) + 1)
+        short_edge = np.random.randint(
+            min(img_scale_short),
+            max(img_scale_short) + 1)
+        img_scale = (long_edge, short_edge)
+        return img_scale, None
+
+    @staticmethod
+    def random_sample_ratio(img_scale, ratio_range):
+        """Randomly sample an img_scale when ``ratio_range`` is specified.
+
+        A ratio will be randomly sampled from the range specified by
+        ``ratio_range``. Then it would be multiplied with ``img_scale`` to
+        generate sampled scale.
+
+        Args:
+            img_scale (tuple): Images scale base to multiply with ratio.
+            ratio_range (tuple[float]): The minimum and maximum ratio to scale
+                the ``img_scale``.
+
+        Returns:
+            (tuple, None): Returns a tuple ``(scale, None)``, where
+                ``scale`` is sampled ratio multiplied with ``img_scale`` and
+                None is just a placeholder to be consistent with
+                :func:`random_select`.
+        """
+
+        assert isinstance(img_scale, tuple) and len(img_scale) == 2
+        min_ratio, max_ratio = ratio_range
+        assert min_ratio <= max_ratio
+        ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+        scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+        return scale, None
+
+    def _random_scale(self, results):
+        """Randomly sample an img_scale according to ``ratio_range`` and
+        ``multiscale_mode``.
+
+        If ``ratio_range`` is specified, a ratio will be sampled and be
+        multiplied with ``img_scale``.
+        If multiple scales are specified by ``img_scale``, a scale will be
+        sampled according to ``multiscale_mode``.
+        Otherwise, single scale will be used.
+
+        Args:
+            results (dict): Result dict from :obj:`dataset`.
+
+        Returns:
+            dict: Two new keys 'scale` and 'scale_idx` are added into
+                ``results``, which would be used by subsequent pipelines.
+        """
+
+        if self.ratio_range is not None:
+            if self.img_scale is None:
+                h, w = results['img'].shape[:2]
+                scale, scale_idx = self.random_sample_ratio((w, h),
+                                                            self.ratio_range)
+            else:
+                scale, scale_idx = self.random_sample_ratio(
+                    self.img_scale[0], self.ratio_range)
+        elif len(self.img_scale) == 1:
+            scale, scale_idx = self.img_scale[0], 0
+        elif self.multiscale_mode == 'range':
+            scale, scale_idx = self.random_sample(self.img_scale)
+        elif self.multiscale_mode == 'value':
+            scale, scale_idx = self.random_select(self.img_scale)
+        else:
+            raise NotImplementedError
+
+        results['scale'] = scale
+        results['scale_idx'] = scale_idx
+
+    def _align(self, img, size_divisor, interpolation=None):
+        align_h = int(np.ceil(img.shape[0] / size_divisor)) * size_divisor
+        align_w = int(np.ceil(img.shape[1] / size_divisor)) * size_divisor
+        if interpolation == None:
+            img = mmcv.imresize(img, (align_w, align_h))
+        else:
+            img = mmcv.imresize(img, (align_w, align_h), interpolation=interpolation)
+        return img
+
+    def _resize_img(self, results):
+        """Resize images with ``results['scale']``."""
+        if self.keep_ratio:
+            img, scale_factor = mmcv.imrescale(
+                results['img'], results['scale'], return_scale=True)
+            #### align ####
+            img = self._align(img, self.size_divisor)
+            # the w_scale and h_scale has minor difference
+            # a real fix should be done in the mmcv.imrescale in the future
+            new_h, new_w = img.shape[:2]
+            h, w = results['img'].shape[:2]
+            w_scale = new_w / w
+            h_scale = new_h / h
+        else:
+            img, w_scale, h_scale = mmcv.imresize(
+                results['img'], results['scale'], return_scale=True)
+
+            h, w = img.shape[:2]
+            assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \
+                   int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \
+                   "img size not align. h:{} w:{}".format(h,w)
+        scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
+                                dtype=np.float32)
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['pad_shape'] = img.shape  # in case that there is no padding
+        results['scale_factor'] = scale_factor
+        results['keep_ratio'] = self.keep_ratio
+
+    def _resize_seg(self, results):
+        """Resize semantic segmentation map with ``results['scale']``."""
+        for key in results.get('seg_fields', []):
+            if self.keep_ratio:
+                gt_seg = mmcv.imrescale(
+                    results[key], results['scale'], interpolation='nearest')
+                gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest')
+            else:
+                gt_seg = mmcv.imresize(
+                    results[key], results['scale'], interpolation='nearest')
+                h, w = gt_seg.shape[:2]
+                assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \
+                       int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \
+                    "gt_seg size not align. h:{} w:{}".format(h, w)
+            results[key] = gt_seg
+
+    def __call__(self, results):
+        """Call function to resize images, bounding boxes, masks, semantic
+        segmentation map.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
+                'keep_ratio' keys are added into result dict.
+        """
+
+        if 'scale' not in results:
+            self._random_scale(results)
+        self._resize_img(results)
+        self._resize_seg(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += (f'(img_scale={self.img_scale}, '
+                     f'multiscale_mode={self.multiscale_mode}, '
+                     f'ratio_range={self.ratio_range}, '
+                     f'keep_ratio={self.keep_ratio})')
+        return repr_str
+
+
+@PIPELINES.register_module()
+class Resize(object):
+    """Resize images & seg.
+
+    This transform resizes the input image to some scale. If the input dict
+    contains the key "scale", then the scale in the input dict is used,
+    otherwise the specified scale in the init method is used.
+
+    ``img_scale`` can be Nong, a tuple (single-scale) or a list of tuple
+    (multi-scale). There are 4 multiscale modes:
+
+    - ``ratio_range is not None``:
+    1. When img_scale is None, img_scale is the shape of image in results
+        (img_scale = results['img'].shape[:2]) and the image is resized based
+        on the original size. (mode 1)
+    2. When img_scale is a tuple (single-scale), randomly sample a ratio from
+        the ratio range and multiply it with the image scale. (mode 2)
+
+    - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
+    scale from the a range. (mode 3)
+
+    - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
+    scale from multiple scales. (mode 4)
+
+    Args:
+        img_scale (tuple or list[tuple]): Images scales for resizing.
+        multiscale_mode (str): Either "range" or "value".
+        ratio_range (tuple[float]): (min_ratio, max_ratio)
+        keep_ratio (bool): Whether to keep the aspect ratio when resizing the
+            image.
+    """
+
+    def __init__(self,
+                 img_scale=None,
+                 multiscale_mode='range',
+                 ratio_range=None,
+                 keep_ratio=True):
+        if img_scale is None:
+            self.img_scale = None
+        else:
+            if isinstance(img_scale, list):
+                self.img_scale = img_scale
+            else:
+                self.img_scale = [img_scale]
+            assert mmcv.is_list_of(self.img_scale, tuple)
+
+        if ratio_range is not None:
+            # mode 1: given img_scale=None and a range of image ratio
+            # mode 2: given a scale and a range of image ratio
+            assert self.img_scale is None or len(self.img_scale) == 1
+        else:
+            # mode 3 and 4: given multiple scales or a range of scales
+            assert multiscale_mode in ['value', 'range']
+
+        self.multiscale_mode = multiscale_mode
+        self.ratio_range = ratio_range
+        self.keep_ratio = keep_ratio
+
+    @staticmethod
+    def random_select(img_scales):
+        """Randomly select an img_scale from given candidates.
+
+        Args:
+            img_scales (list[tuple]): Images scales for selection.
+
+        Returns:
+            (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
+                where ``img_scale`` is the selected image scale and
+                ``scale_idx`` is the selected index in the given candidates.
+        """
+
+        assert mmcv.is_list_of(img_scales, tuple)
+        scale_idx = np.random.randint(len(img_scales))
+        img_scale = img_scales[scale_idx]
+        return img_scale, scale_idx
+
+    @staticmethod
+    def random_sample(img_scales):
+        """Randomly sample an img_scale when ``multiscale_mode=='range'``.
+
+        Args:
+            img_scales (list[tuple]): Images scale range for sampling.
+                There must be two tuples in img_scales, which specify the lower
+                and uper bound of image scales.
+
+        Returns:
+            (tuple, None): Returns a tuple ``(img_scale, None)``, where
+                ``img_scale`` is sampled scale and None is just a placeholder
+                to be consistent with :func:`random_select`.
+        """
+
+        assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
+        img_scale_long = [max(s) for s in img_scales]
+        img_scale_short = [min(s) for s in img_scales]
+        long_edge = np.random.randint(
+            min(img_scale_long),
+            max(img_scale_long) + 1)
+        short_edge = np.random.randint(
+            min(img_scale_short),
+            max(img_scale_short) + 1)
+        img_scale = (long_edge, short_edge)
+        return img_scale, None
+
+    @staticmethod
+    def random_sample_ratio(img_scale, ratio_range):
+        """Randomly sample an img_scale when ``ratio_range`` is specified.
+
+        A ratio will be randomly sampled from the range specified by
+        ``ratio_range``. Then it would be multiplied with ``img_scale`` to
+        generate sampled scale.
+
+        Args:
+            img_scale (tuple): Images scale base to multiply with ratio.
+            ratio_range (tuple[float]): The minimum and maximum ratio to scale
+                the ``img_scale``.
+
+        Returns:
+            (tuple, None): Returns a tuple ``(scale, None)``, where
+                ``scale`` is sampled ratio multiplied with ``img_scale`` and
+                None is just a placeholder to be consistent with
+                :func:`random_select`.
+        """
+
+        assert isinstance(img_scale, tuple) and len(img_scale) == 2
+        min_ratio, max_ratio = ratio_range
+        assert min_ratio <= max_ratio
+        ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+        scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+        return scale, None
+
+    def _random_scale(self, results):
+        """Randomly sample an img_scale according to ``ratio_range`` and
+        ``multiscale_mode``.
+
+        If ``ratio_range`` is specified, a ratio will be sampled and be
+        multiplied with ``img_scale``.
+        If multiple scales are specified by ``img_scale``, a scale will be
+        sampled according to ``multiscale_mode``.
+        Otherwise, single scale will be used.
+
+        Args:
+            results (dict): Result dict from :obj:`dataset`.
+
+        Returns:
+            dict: Two new keys 'scale` and 'scale_idx` are added into
+                ``results``, which would be used by subsequent pipelines.
+        """
+
+        if self.ratio_range is not None:
+            if self.img_scale is None:
+                h, w = results['img'].shape[:2]
+                scale, scale_idx = self.random_sample_ratio((w, h),
+                                                            self.ratio_range)
+            else:
+                scale, scale_idx = self.random_sample_ratio(
+                    self.img_scale[0], self.ratio_range)
+        elif len(self.img_scale) == 1:
+            scale, scale_idx = self.img_scale[0], 0
+        elif self.multiscale_mode == 'range':
+            scale, scale_idx = self.random_sample(self.img_scale)
+        elif self.multiscale_mode == 'value':
+            scale, scale_idx = self.random_select(self.img_scale)
+        else:
+            raise NotImplementedError
+
+        results['scale'] = scale
+        results['scale_idx'] = scale_idx
+
+    def _resize_img(self, results):
+        """Resize images with ``results['scale']``."""
+        if self.keep_ratio:
+            img, scale_factor = mmcv.imrescale(
+                results['img'], results['scale'], return_scale=True)
+            # the w_scale and h_scale has minor difference
+            # a real fix should be done in the mmcv.imrescale in the future
+            new_h, new_w = img.shape[:2]
+            h, w = results['img'].shape[:2]
+            w_scale = new_w / w
+            h_scale = new_h / h
+        else:
+            img, w_scale, h_scale = mmcv.imresize(
+                results['img'], results['scale'], return_scale=True)
+        scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
+                                dtype=np.float32)
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['pad_shape'] = img.shape  # in case that there is no padding
+        results['scale_factor'] = scale_factor
+        results['keep_ratio'] = self.keep_ratio
+
+    def _resize_seg(self, results):
+        """Resize semantic segmentation map with ``results['scale']``."""
+        for key in results.get('seg_fields', []):
+            if self.keep_ratio:
+                gt_seg = mmcv.imrescale(
+                    results[key], results['scale'], interpolation='nearest')
+            else:
+                gt_seg = mmcv.imresize(
+                    results[key], results['scale'], interpolation='nearest')
+            results[key] = gt_seg
+
+    def __call__(self, results):
+        """Call function to resize images, bounding boxes, masks, semantic
+        segmentation map.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
+                'keep_ratio' keys are added into result dict.
+        """
+
+        if 'scale' not in results:
+            self._random_scale(results)
+        self._resize_img(results)
+        self._resize_seg(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += (f'(img_scale={self.img_scale}, '
+                     f'multiscale_mode={self.multiscale_mode}, '
+                     f'ratio_range={self.ratio_range}, '
+                     f'keep_ratio={self.keep_ratio})')
+        return repr_str
+
+
+@PIPELINES.register_module()
+class RandomFlip(object):
+    """Flip the image & seg.
+
+    If the input dict contains the key "flip", then the flag will be used,
+    otherwise it will be randomly decided by a ratio specified in the init
+    method.
+
+    Args:
+        prob (float, optional): The flipping probability. Default: None.
+        direction(str, optional): The flipping direction. Options are
+            'horizontal' and 'vertical'. Default: 'horizontal'.
+    """
+
+    @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip')
+    def __init__(self, prob=None, direction='horizontal'):
+        self.prob = prob
+        self.direction = direction
+        if prob is not None:
+            assert prob >= 0 and prob <= 1
+        assert direction in ['horizontal', 'vertical']
+
+    def __call__(self, results):
+        """Call function to flip bounding boxes, masks, semantic segmentation
+        maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Flipped results, 'flip', 'flip_direction' keys are added into
+                result dict.
+        """
+
+        if 'flip' not in results:
+            flip = True if np.random.rand() < self.prob else False
+            results['flip'] = flip
+        if 'flip_direction' not in results:
+            results['flip_direction'] = self.direction
+        if results['flip']:
+            # flip image
+            results['img'] = mmcv.imflip(
+                results['img'], direction=results['flip_direction'])
+
+            # flip segs
+            for key in results.get('seg_fields', []):
+                # use copy() to make numpy stride positive
+                results[key] = mmcv.imflip(
+                    results[key], direction=results['flip_direction']).copy()
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(prob={self.prob})'
+
+
+@PIPELINES.register_module()
+class Pad(object):
+    """Pad the image & mask.
+
+    There are two padding modes: (1) pad to a fixed size and (2) pad to the
+    minimum size that is divisible by some number.
+    Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
+
+    Args:
+        size (tuple, optional): Fixed padding size.
+        size_divisor (int, optional): The divisor of padded size.
+        pad_val (float, optional): Padding value. Default: 0.
+        seg_pad_val (float, optional): Padding value of segmentation map.
+            Default: 255.
+    """
+
+    def __init__(self,
+                 size=None,
+                 size_divisor=None,
+                 pad_val=0,
+                 seg_pad_val=255):
+        self.size = size
+        self.size_divisor = size_divisor
+        self.pad_val = pad_val
+        self.seg_pad_val = seg_pad_val
+        # only one of size and size_divisor should be valid
+        assert size is not None or size_divisor is not None
+        assert size is None or size_divisor is None
+
+    def _pad_img(self, results):
+        """Pad images according to ``self.size``."""
+        if self.size is not None:
+            padded_img = mmcv.impad(
+                results['img'], shape=self.size, pad_val=self.pad_val)
+        elif self.size_divisor is not None:
+            padded_img = mmcv.impad_to_multiple(
+                results['img'], self.size_divisor, pad_val=self.pad_val)
+        results['img'] = padded_img
+        results['pad_shape'] = padded_img.shape
+        results['pad_fixed_size'] = self.size
+        results['pad_size_divisor'] = self.size_divisor
+
+    def _pad_seg(self, results):
+        """Pad masks according to ``results['pad_shape']``."""
+        for key in results.get('seg_fields', []):
+            results[key] = mmcv.impad(
+                results[key],
+                shape=results['pad_shape'][:2],
+                pad_val=self.seg_pad_val)
+
+    def __call__(self, results):
+        """Call function to pad images, masks, semantic segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Updated result dict.
+        """
+
+        self._pad_img(results)
+        self._pad_seg(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \
+                    f'pad_val={self.pad_val})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class Normalize(object):
+    """Normalize the image.
+
+    Added key is "img_norm_cfg".
+
+    Args:
+        mean (sequence): Mean values of 3 channels.
+        std (sequence): Std values of 3 channels.
+        to_rgb (bool): Whether to convert the image from BGR to RGB,
+            default is true.
+    """
+
+    def __init__(self, mean, std, to_rgb=True):
+        self.mean = np.array(mean, dtype=np.float32)
+        self.std = np.array(std, dtype=np.float32)
+        self.to_rgb = to_rgb
+
+    def __call__(self, results):
+        """Call function to normalize images.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Normalized results, 'img_norm_cfg' key is added into
+                result dict.
+        """
+
+        results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std,
+                                          self.to_rgb)
+        results['img_norm_cfg'] = dict(
+            mean=self.mean, std=self.std, to_rgb=self.to_rgb)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \
+                    f'{self.to_rgb})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class Rerange(object):
+    """Rerange the image pixel value.
+
+    Args:
+        min_value (float or int): Minimum value of the reranged image.
+            Default: 0.
+        max_value (float or int): Maximum value of the reranged image.
+            Default: 255.
+    """
+
+    def __init__(self, min_value=0, max_value=255):
+        assert isinstance(min_value, float) or isinstance(min_value, int)
+        assert isinstance(max_value, float) or isinstance(max_value, int)
+        assert min_value < max_value
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def __call__(self, results):
+        """Call function to rerange images.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            dict: Reranged results.
+        """
+
+        img = results['img']
+        img_min_value = np.min(img)
+        img_max_value = np.max(img)
+
+        assert img_min_value < img_max_value
+        # rerange to [0, 1]
+        img = (img - img_min_value) / (img_max_value - img_min_value)
+        # rerange to [min_value, max_value]
+        img = img * (self.max_value - self.min_value) + self.min_value
+        results['img'] = img
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(min_value={self.min_value}, max_value={self.max_value})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class CLAHE(object):
+    """Use CLAHE method to process the image.
+
+    See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
+    Graphics Gems, 1994:474-485.` for more information.
+
+    Args:
+        clip_limit (float): Threshold for contrast limiting. Default: 40.0.
+        tile_grid_size (tuple[int]): Size of grid for histogram equalization.
+            Input image will be divided into equally sized rectangular tiles.
+            It defines the number of tiles in row and column. Default: (8, 8).
+    """
+
+    def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)):
+        assert isinstance(clip_limit, (float, int))
+        self.clip_limit = clip_limit
+        assert is_tuple_of(tile_grid_size, int)
+        assert len(tile_grid_size) == 2
+        self.tile_grid_size = tile_grid_size
+
+    def __call__(self, results):
+        """Call function to Use CLAHE method process images.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Processed results.
+        """
+
+        for i in range(results['img'].shape[2]):
+            results['img'][:, :, i] = mmcv.clahe(
+                np.array(results['img'][:, :, i], dtype=np.uint8),
+                self.clip_limit, self.tile_grid_size)
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(clip_limit={self.clip_limit}, '\
+                    f'tile_grid_size={self.tile_grid_size})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class RandomCrop(object):
+    """Random crop the image & seg.
+
+    Args:
+        crop_size (tuple): Expected size after cropping, (h, w).
+        cat_max_ratio (float): The maximum ratio that single category could
+            occupy.
+    """
+
+    def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255):
+        assert crop_size[0] > 0 and crop_size[1] > 0
+        self.crop_size = crop_size
+        self.cat_max_ratio = cat_max_ratio
+        self.ignore_index = ignore_index
+
+    def get_crop_bbox(self, img):
+        """Randomly get a crop bounding box."""
+        margin_h = max(img.shape[0] - self.crop_size[0], 0)
+        margin_w = max(img.shape[1] - self.crop_size[1], 0)
+        offset_h = np.random.randint(0, margin_h + 1)
+        offset_w = np.random.randint(0, margin_w + 1)
+        crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0]
+        crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1]
+
+        return crop_y1, crop_y2, crop_x1, crop_x2
+
+    def crop(self, img, crop_bbox):
+        """Crop from ``img``"""
+        crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox
+        img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
+        return img
+
+    def __call__(self, results):
+        """Call function to randomly crop images, semantic segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Randomly cropped results, 'img_shape' key in result dict is
+                updated according to crop size.
+        """
+
+        img = results['img']
+        crop_bbox = self.get_crop_bbox(img)
+        if self.cat_max_ratio < 1.:
+            # Repeat 10 times
+            for _ in range(10):
+                seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox)
+                labels, cnt = np.unique(seg_temp, return_counts=True)
+                cnt = cnt[labels != self.ignore_index]
+                if len(cnt) > 1 and np.max(cnt) / np.sum(
+                        cnt) < self.cat_max_ratio:
+                    break
+                crop_bbox = self.get_crop_bbox(img)
+
+        # crop the image
+        img = self.crop(img, crop_bbox)
+        img_shape = img.shape
+        results['img'] = img
+        results['img_shape'] = img_shape
+
+        # crop semantic seg
+        for key in results.get('seg_fields', []):
+            results[key] = self.crop(results[key], crop_bbox)
+
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(crop_size={self.crop_size})'
+
+@PIPELINES.register_module()
+class CenterCrop(object):
+    """Center crop the image & seg.
+    Args:
+        crop_size (tuple): Expected size after cropping, (h, w).
+    """
+
+    def __init__(self, crop_size, ignore_index=255):
+        assert crop_size[0] > 0 and crop_size[1] > 0
+        self.crop_size = crop_size
+        self.ignore_index = ignore_index
+
+    def get_crop_bbox(self, img):
+        """Randomly get a crop bounding box."""
+        margin_h = max(img.shape[0] - self.crop_size[0], 0)
+        margin_w = max(img.shape[1] - self.crop_size[1], 0)
+        offset_h = margin_h // 2#np.random.randint(0, margin_h + 1)
+        offset_w = margin_w // 2#np.random.randint(0, margin_w + 1)
+        crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0]
+        crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1]
+
+        return crop_y1, crop_y2, crop_x1, crop_x2
+
+    def crop(self, img, crop_bbox):
+        """Crop from ``img``"""
+        crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox
+        img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
+        return img
+
+    def __call__(self, results):
+        """Call function to randomly crop images, semantic segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Randomly cropped results, 'img_shape' key in result dict is
+                updated according to crop size.
+        """
+
+        img = results['img']
+        crop_bbox = self.get_crop_bbox(img)
+
+        # crop the image
+        img = self.crop(img, crop_bbox)
+        img_shape = img.shape
+        results['img'] = img
+        results['img_shape'] = img_shape
+
+        # crop semantic seg
+        for key in results.get('seg_fields', []):
+            results[key] = self.crop(results[key], crop_bbox)
+
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(crop_size={self.crop_size})'
+
+
+@PIPELINES.register_module()
+class RandomRotate(object):
+    """Rotate the image & seg.
+
+    Args:
+        prob (float): The rotation probability.
+        degree (float, tuple[float]): Range of degrees to select from. If
+            degree is a number instead of tuple like (min, max),
+            the range of degree will be (``-degree``, ``+degree``)
+        pad_val (float, optional): Padding value of image. Default: 0.
+        seg_pad_val (float, optional): Padding value of segmentation map.
+            Default: 255.
+        center (tuple[float], optional): Center point (w, h) of the rotation in
+            the source image. If not specified, the center of the image will be
+            used. Default: None.
+        auto_bound (bool): Whether to adjust the image size to cover the whole
+            rotated image. Default: False
+    """
+
+    def __init__(self,
+                 prob,
+                 degree,
+                 pad_val=0,
+                 seg_pad_val=255,
+                 center=None,
+                 auto_bound=False):
+        self.prob = prob
+        assert prob >= 0 and prob <= 1
+        if isinstance(degree, (float, int)):
+            assert degree > 0, f'degree {degree} should be positive'
+            self.degree = (-degree, degree)
+        else:
+            self.degree = degree
+        assert len(self.degree) == 2, f'degree {self.degree} should be a ' \
+                                      f'tuple of (min, max)'
+        self.pal_val = pad_val
+        self.seg_pad_val = seg_pad_val
+        self.center = center
+        self.auto_bound = auto_bound
+
+    def __call__(self, results):
+        """Call function to rotate image, semantic segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Rotated results.
+        """
+
+        rotate = True if np.random.rand() < self.prob else False
+        degree = np.random.uniform(min(*self.degree), max(*self.degree))
+        if rotate:
+            # rotate image
+            results['img'] = mmcv.imrotate(
+                results['img'],
+                angle=degree,
+                border_value=self.pal_val,
+                center=self.center,
+                auto_bound=self.auto_bound)
+
+            # rotate segs
+            for key in results.get('seg_fields', []):
+                results[key] = mmcv.imrotate(
+                    results[key],
+                    angle=degree,
+                    border_value=self.seg_pad_val,
+                    center=self.center,
+                    auto_bound=self.auto_bound,
+                    interpolation='nearest')
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(prob={self.prob}, ' \
+                    f'degree={self.degree}, ' \
+                    f'pad_val={self.pal_val}, ' \
+                    f'seg_pad_val={self.seg_pad_val}, ' \
+                    f'center={self.center}, ' \
+                    f'auto_bound={self.auto_bound})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class RGB2Gray(object):
+    """Convert RGB image to grayscale image.
+
+    This transform calculate the weighted mean of input image channels with
+    ``weights`` and then expand the channels to ``out_channels``. When
+    ``out_channels`` is None, the number of output channels is the same as
+    input channels.
+
+    Args:
+        out_channels (int): Expected number of output channels after
+            transforming. Default: None.
+        weights (tuple[float]): The weights to calculate the weighted mean.
+            Default: (0.299, 0.587, 0.114).
+    """
+
+    def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)):
+        assert out_channels is None or out_channels > 0
+        self.out_channels = out_channels
+        assert isinstance(weights, tuple)
+        for item in weights:
+            assert isinstance(item, (float, int))
+        self.weights = weights
+
+    def __call__(self, results):
+        """Call function to convert RGB image to grayscale image.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Result dict with grayscale image.
+        """
+        img = results['img']
+        assert len(img.shape) == 3
+        assert img.shape[2] == len(self.weights)
+        weights = np.array(self.weights).reshape((1, 1, -1))
+        img = (img * weights).sum(2, keepdims=True)
+        if self.out_channels is None:
+            img = img.repeat(weights.shape[2], axis=2)
+        else:
+            img = img.repeat(self.out_channels, axis=2)
+
+        results['img'] = img
+        results['img_shape'] = img.shape
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(out_channels={self.out_channels}, ' \
+                    f'weights={self.weights})'
+        return repr_str
+
+
+@PIPELINES.register_module()
+class AdjustGamma(object):
+    """Using gamma correction to process the image.
+
+    Args:
+        gamma (float or int): Gamma value used in gamma correction.
+            Default: 1.0.
+    """
+
+    def __init__(self, gamma=1.0):
+        assert isinstance(gamma, float) or isinstance(gamma, int)
+        assert gamma > 0
+        self.gamma = gamma
+        inv_gamma = 1.0 / gamma
+        self.table = np.array([(i / 255.0)**inv_gamma * 255
+                               for i in np.arange(256)]).astype('uint8')
+
+    def __call__(self, results):
+        """Call function to process the image with gamma correction.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Processed results.
+        """
+
+        results['img'] = mmcv.lut_transform(
+            np.array(results['img'], dtype=np.uint8), self.table)
+
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(gamma={self.gamma})'
+
+@PIPELINES.register_module()
+class MaillaryHack(object):
+    """ map MV 65 class to 19 class like Cityscapes
+    """
+    def __init__(self):
+        self.map = [[13, 24, 41], [2, 15], [17], [6], [3], [45, 47], [48], [50], [30], [29],
+                    [27], [19], [20, 21, 22], [55], [61], [54], [58], [57], [52]]
+
+        self.others = [i for i in range(66)]
+        for i in self.map:
+            for j in i:
+                if j in self.others:
+                    self.others.remove(j)
+
+
+    def __call__(self, results):
+        """Call function to process the image with gamma correction.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Processed results.
+        """
+        gt_map = results['gt_semantic_seg']
+        # others -> 255
+        for value in self.others:
+            gt_map[gt_map == value] = 255
+
+        for index, map in enumerate(self.map):
+            for value in map:
+                gt_map[gt_map == value] = index
+
+        results['gt_semantic_seg'] = gt_map
+
+        return results
+
+    def __repr__(self):
+        return 'MaillaryHack'
+
+
+@PIPELINES.register_module()
+class SegRescale(object):
+    """Rescale semantic segmentation maps.
+
+    Args:
+        scale_factor (float): The scale factor of the final output.
+    """
+
+    def __init__(self, scale_factor=1):
+        self.scale_factor = scale_factor
+
+    def __call__(self, results):
+        """Call function to scale the semantic segmentation map.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Result dict with semantic segmentation map scaled.
+        """
+        for key in results.get('seg_fields', []):
+            if self.scale_factor != 1:
+                results[key] = mmcv.imrescale(
+                    results[key], self.scale_factor, interpolation='nearest')
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(scale_factor={self.scale_factor})'
+
+
+@PIPELINES.register_module()
+class PhotoMetricDistortion(object):
+    """Apply photometric distortion to image sequentially, every transformation
+    is applied with a probability of 0.5. The position of random contrast is in
+    second or second to last.
+
+    1. random brightness
+    2. random contrast (mode 0)
+    3. convert color from BGR to HSV
+    4. random saturation
+    5. random hue
+    6. convert color from HSV to BGR
+    7. random contrast (mode 1)
+    8. randomly swap channels
+
+    Args:
+        brightness_delta (int): delta of brightness.
+        contrast_range (tuple): range of contrast.
+        saturation_range (tuple): range of saturation.
+        hue_delta (int): delta of hue.
+    """
+
+    def __init__(self,
+                 brightness_delta=32,
+                 contrast_range=(0.5, 1.5),
+                 saturation_range=(0.5, 1.5),
+                 hue_delta=18):
+        self.brightness_delta = brightness_delta
+        self.contrast_lower, self.contrast_upper = contrast_range
+        self.saturation_lower, self.saturation_upper = saturation_range
+        self.hue_delta = hue_delta
+
+    def convert(self, img, alpha=1, beta=0):
+        """Multiple with alpha and add beat with clip."""
+        img = img.astype(np.float32) * alpha + beta
+        img = np.clip(img, 0, 255)
+        return img.astype(np.uint8)
+
+    def brightness(self, img):
+        """Brightness distortion."""
+        if random.randint(2):
+            return self.convert(
+                img,
+                beta=random.uniform(-self.brightness_delta,
+                                    self.brightness_delta))
+        return img
+
+    def contrast(self, img):
+        """Contrast distortion."""
+        if random.randint(2):
+            return self.convert(
+                img,
+                alpha=random.uniform(self.contrast_lower, self.contrast_upper))
+        return img
+
+    def saturation(self, img):
+        """Saturation distortion."""
+        if random.randint(2):
+            img = mmcv.bgr2hsv(img)
+            img[:, :, 1] = self.convert(
+                img[:, :, 1],
+                alpha=random.uniform(self.saturation_lower,
+                                     self.saturation_upper))
+            img = mmcv.hsv2bgr(img)
+        return img
+
+    def hue(self, img):
+        """Hue distortion."""
+        if random.randint(2):
+            img = mmcv.bgr2hsv(img)
+            img[:, :,
+                0] = (img[:, :, 0].astype(int) +
+                      random.randint(-self.hue_delta, self.hue_delta)) % 180
+            img = mmcv.hsv2bgr(img)
+        return img
+
+    def __call__(self, results):
+        """Call function to perform photometric distortion on images.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Result dict with images distorted.
+        """
+
+        img = results['img']
+        # random brightness
+        img = self.brightness(img)
+
+        # mode == 0 --> do random contrast first
+        # mode == 1 --> do random contrast last
+        mode = random.randint(2)
+        if mode == 1:
+            img = self.contrast(img)
+
+        # random saturation
+        img = self.saturation(img)
+
+        # random hue
+        img = self.hue(img)
+
+        # random contrast
+        if mode == 0:
+            img = self.contrast(img)
+
+        results['img'] = img
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += (f'(brightness_delta={self.brightness_delta}, '
+                     f'contrast_range=({self.contrast_lower}, '
+                     f'{self.contrast_upper}), '
+                     f'saturation_range=({self.saturation_lower}, '
+                     f'{self.saturation_upper}), '
+                     f'hue_delta={self.hue_delta})')
+        return repr_str
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbd14e0920e7f6a73baff1432e5a32ccfdb0dfae
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/stare.py
@@ -0,0 +1,27 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class STAREDataset(CustomDataset):
+    """STARE dataset.
+
+    In segmentation map annotation for STARE, 0 stands for background, which is
+    included in 2 categories. ``reduce_zero_label`` is fixed to False. The
+    ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
+    '.ah.png'.
+    """
+
+    CLASSES = ('background', 'vessel')
+
+    PALETTE = [[120, 120, 120], [6, 230, 230]]
+
+    def __init__(self, **kwargs):
+        super(STAREDataset, self).__init__(
+            img_suffix='.png',
+            seg_map_suffix='.ah.png',
+            reduce_zero_label=False,
+            **kwargs)
+        assert osp.exists(self.img_dir)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/voc.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8855203b14ee0dc4da9099a2945d4aedcffbcd6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/datasets/voc.py
@@ -0,0 +1,29 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class PascalVOCDataset(CustomDataset):
+    """Pascal VOC dataset.
+
+    Args:
+        split (str): Split txt file for Pascal VOC.
+    """
+
+    CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+               'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+               'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
+               'train', 'tvmonitor')
+
+    PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
+               [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
+               [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
+               [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
+               [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
+
+    def __init__(self, split, **kwargs):
+        super(PascalVOCDataset, self).__init__(
+            img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs)
+        assert osp.exists(self.img_dir) and self.split is not None
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cf93f8bec9cf0cef0a3bd76ca3ca92eb188f535
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/__init__.py
@@ -0,0 +1,12 @@
+from .backbones import *  # noqa: F401,F403
+from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone,
+                      build_head, build_loss, build_segmentor)
+from .decode_heads import *  # noqa: F401,F403
+from .losses import *  # noqa: F401,F403
+from .necks import *  # noqa: F401,F403
+from .segmentors import *  # noqa: F401,F403
+
+__all__ = [
+    'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone',
+    'build_head', 'build_loss', 'build_segmentor'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5293bd9b03eb0bf577a743797995fb3a9d6ccc05
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/__init__.py
@@ -0,0 +1,15 @@
+from .cgnet import CGNet
+from .fast_scnn import FastSCNN
+from .hrnet import HRNet
+from .mobilenet_v2 import MobileNetV2
+from .mobilenet_v3 import MobileNetV3
+from .resnest import ResNeSt
+from .resnet import ResNet, ResNetV1c, ResNetV1d
+from .resnext import ResNeXt
+from .unet import UNet
+
+from .mix_transformer import *
+
+__all__ = [
+    'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
+    'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/cgnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/cgnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..968d171cd4b28b4670ef3cd01a6a407875ee478e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/cgnet.py
@@ -0,0 +1,367 @@
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
+                      constant_init, kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmseg.utils import get_root_logger
+from ..builder import BACKBONES
+
+
+class GlobalContextExtractor(nn.Module):
+    """Global Context Extractor for CGNet.
+
+    This class is employed to refine the joFint feature of both local feature
+    and surrounding context.
+
+    Args:
+        channel (int): Number of input feature channels.
+        reduction (int): Reductions for global context extractor. Default: 16.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+    """
+
+    def __init__(self, channel, reduction=16, with_cp=False):
+        super(GlobalContextExtractor, self).__init__()
+        self.channel = channel
+        self.reduction = reduction
+        assert reduction >= 1 and channel >= reduction
+        self.with_cp = with_cp
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel), nn.Sigmoid())
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            num_batch, num_channel = x.size()[:2]
+            y = self.avg_pool(x).view(num_batch, num_channel)
+            y = self.fc(y).view(num_batch, num_channel, 1, 1)
+            return x * y
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
+
+
+class ContextGuidedBlock(nn.Module):
+    """Context Guided Block for CGNet.
+
+    This class consists of four components: local feature extractor,
+    surrounding feature extractor, joint feature extractor and global
+    context extractor.
+
+    Args:
+        in_channels (int): Number of input feature channels.
+        out_channels (int): Number of output feature channels.
+        dilation (int): Dilation rate for surrounding context extractor.
+            Default: 2.
+        reduction (int): Reduction for global context extractor. Default: 16.
+        skip_connect (bool): Add input to output or not. Default: True.
+        downsample (bool): Downsample the input to 1/2 or not. Default: False.
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='PReLU').
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 dilation=2,
+                 reduction=16,
+                 skip_connect=True,
+                 downsample=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 act_cfg=dict(type='PReLU'),
+                 with_cp=False):
+        super(ContextGuidedBlock, self).__init__()
+        self.with_cp = with_cp
+        self.downsample = downsample
+
+        channels = out_channels if downsample else out_channels // 2
+        if 'type' in act_cfg and act_cfg['type'] == 'PReLU':
+            act_cfg['num_parameters'] = channels
+        kernel_size = 3 if downsample else 1
+        stride = 2 if downsample else 1
+        padding = (kernel_size - 1) // 2
+
+        self.conv1x1 = ConvModule(
+            in_channels,
+            channels,
+            kernel_size,
+            stride,
+            padding,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+
+        self.f_loc = build_conv_layer(
+            conv_cfg,
+            channels,
+            channels,
+            kernel_size=3,
+            padding=1,
+            groups=channels,
+            bias=False)
+        self.f_sur = build_conv_layer(
+            conv_cfg,
+            channels,
+            channels,
+            kernel_size=3,
+            padding=dilation,
+            groups=channels,
+            dilation=dilation,
+            bias=False)
+
+        self.bn = build_norm_layer(norm_cfg, 2 * channels)[1]
+        self.activate = nn.PReLU(2 * channels)
+
+        if downsample:
+            self.bottleneck = build_conv_layer(
+                conv_cfg,
+                2 * channels,
+                out_channels,
+                kernel_size=1,
+                bias=False)
+
+        self.skip_connect = skip_connect and not downsample
+        self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp)
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            out = self.conv1x1(x)
+            loc = self.f_loc(out)
+            sur = self.f_sur(out)
+
+            joi_feat = torch.cat([loc, sur], 1)  # the joint feature
+            joi_feat = self.bn(joi_feat)
+            joi_feat = self.activate(joi_feat)
+            if self.downsample:
+                joi_feat = self.bottleneck(joi_feat)  # channel = out_channels
+            # f_glo is employed to refine the joint feature
+            out = self.f_glo(joi_feat)
+
+            if self.skip_connect:
+                return x + out
+            else:
+                return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
+
+
+class InputInjection(nn.Module):
+    """Downsampling module for CGNet."""
+
+    def __init__(self, num_downsampling):
+        super(InputInjection, self).__init__()
+        self.pool = nn.ModuleList()
+        for i in range(num_downsampling):
+            self.pool.append(nn.AvgPool2d(3, stride=2, padding=1))
+
+    def forward(self, x):
+        for pool in self.pool:
+            x = pool(x)
+        return x
+
+
+@BACKBONES.register_module()
+class CGNet(nn.Module):
+    """CGNet backbone.
+
+    A Light-weight Context Guided Network for Semantic Segmentation
+    arXiv: https://arxiv.org/abs/1811.08201
+
+    Args:
+        in_channels (int): Number of input image channels. Normally 3.
+        num_channels (tuple[int]): Numbers of feature channels at each stages.
+            Default: (32, 64, 128).
+        num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2.
+            Default: (3, 21).
+        dilations (tuple[int]): Dilation rate for surrounding context
+            extractors at stage 1 and stage 2. Default: (2, 4).
+        reductions (tuple[int]): Reductions for global context extractors at
+            stage 1 and stage 2. Default: (8, 16).
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN', requires_grad=True).
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='PReLU').
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only. Default: False.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+    """
+
+    def __init__(self,
+                 in_channels=3,
+                 num_channels=(32, 64, 128),
+                 num_blocks=(3, 21),
+                 dilations=(2, 4),
+                 reductions=(8, 16),
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 act_cfg=dict(type='PReLU'),
+                 norm_eval=False,
+                 with_cp=False):
+
+        super(CGNet, self).__init__()
+        self.in_channels = in_channels
+        self.num_channels = num_channels
+        assert isinstance(self.num_channels, tuple) and len(
+            self.num_channels) == 3
+        self.num_blocks = num_blocks
+        assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2
+        self.dilations = dilations
+        assert isinstance(self.dilations, tuple) and len(self.dilations) == 2
+        self.reductions = reductions
+        assert isinstance(self.reductions, tuple) and len(self.reductions) == 2
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU':
+            self.act_cfg['num_parameters'] = num_channels[0]
+        self.norm_eval = norm_eval
+        self.with_cp = with_cp
+
+        cur_channels = in_channels
+        self.stem = nn.ModuleList()
+        for i in range(3):
+            self.stem.append(
+                ConvModule(
+                    cur_channels,
+                    num_channels[0],
+                    3,
+                    2 if i == 0 else 1,
+                    padding=1,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+            cur_channels = num_channels[0]
+
+        self.inject_2x = InputInjection(1)  # down-sample for Input, factor=2
+        self.inject_4x = InputInjection(2)  # down-sample for Input, factor=4
+
+        cur_channels += in_channels
+        self.norm_prelu_0 = nn.Sequential(
+            build_norm_layer(norm_cfg, cur_channels)[1],
+            nn.PReLU(cur_channels))
+
+        # stage 1
+        self.level1 = nn.ModuleList()
+        for i in range(num_blocks[0]):
+            self.level1.append(
+                ContextGuidedBlock(
+                    cur_channels if i == 0 else num_channels[1],
+                    num_channels[1],
+                    dilations[0],
+                    reductions[0],
+                    downsample=(i == 0),
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg,
+                    with_cp=with_cp))  # CG block
+
+        cur_channels = 2 * num_channels[1] + in_channels
+        self.norm_prelu_1 = nn.Sequential(
+            build_norm_layer(norm_cfg, cur_channels)[1],
+            nn.PReLU(cur_channels))
+
+        # stage 2
+        self.level2 = nn.ModuleList()
+        for i in range(num_blocks[1]):
+            self.level2.append(
+                ContextGuidedBlock(
+                    cur_channels if i == 0 else num_channels[2],
+                    num_channels[2],
+                    dilations[1],
+                    reductions[1],
+                    downsample=(i == 0),
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg,
+                    with_cp=with_cp))  # CG block
+
+        cur_channels = 2 * num_channels[2]
+        self.norm_prelu_2 = nn.Sequential(
+            build_norm_layer(norm_cfg, cur_channels)[1],
+            nn.PReLU(cur_channels))
+
+    def forward(self, x):
+        output = []
+
+        # stage 0
+        inp_2x = self.inject_2x(x)
+        inp_4x = self.inject_4x(x)
+        for layer in self.stem:
+            x = layer(x)
+        x = self.norm_prelu_0(torch.cat([x, inp_2x], 1))
+        output.append(x)
+
+        # stage 1
+        for i, layer in enumerate(self.level1):
+            x = layer(x)
+            if i == 0:
+                down1 = x
+        x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1))
+        output.append(x)
+
+        # stage 2
+        for i, layer in enumerate(self.level2):
+            x = layer(x)
+            if i == 0:
+                down2 = x
+        x = self.norm_prelu_2(torch.cat([down2, x], 1))
+        output.append(x)
+
+        return output
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear)):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+                elif isinstance(m, nn.PReLU):
+                    constant_init(m, 0)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def train(self, mode=True):
+        """Convert the model into training mode whill keeping the normalization
+        layer freezed."""
+        super(CGNet, self).train(mode)
+        if mode and self.norm_eval:
+            for m in self.modules():
+                # trick: eval have effect on BatchNorm only
+                if isinstance(m, _BatchNorm):
+                    m.eval()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/fast_scnn.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/fast_scnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9ca64d7ddb0c84e80c49eda5d447d99ef0f0718
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/fast_scnn.py
@@ -0,0 +1,375 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, constant_init,
+                      kaiming_init)
+from torch.nn.modules.batchnorm import _BatchNorm
+
+#from mmseg.models.decode_heads.psp_head import PPM
+from mmseg.ops import resize
+from ..builder import BACKBONES
+from ..utils.inverted_residual import InvertedResidual
+
+
+class LearningToDownsample(nn.Module):
+    """Learning to downsample module.
+
+    Args:
+        in_channels (int): Number of input channels.
+        dw_channels (tuple[int]): Number of output channels of the first and
+            the second depthwise conv (dwconv) layers.
+        out_channels (int): Number of output channels of the whole
+            'learning to downsample' module.
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+    """
+
+    def __init__(self,
+                 in_channels,
+                 dw_channels,
+                 out_channels,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU')):
+        super(LearningToDownsample, self).__init__()
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        dw_channels1 = dw_channels[0]
+        dw_channels2 = dw_channels[1]
+
+        self.conv = ConvModule(
+            in_channels,
+            dw_channels1,
+            3,
+            stride=2,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.dsconv1 = DepthwiseSeparableConvModule(
+            dw_channels1,
+            dw_channels2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg)
+        self.dsconv2 = DepthwiseSeparableConvModule(
+            dw_channels2,
+            out_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.dsconv1(x)
+        x = self.dsconv2(x)
+        return x
+
+
+class GlobalFeatureExtractor(nn.Module):
+    """Global feature extractor module.
+
+    Args:
+        in_channels (int): Number of input channels of the GFE module.
+            Default: 64
+        block_channels (tuple[int]): Tuple of ints. Each int specifies the
+            number of output channels of each Inverted Residual module.
+            Default: (64, 96, 128)
+        out_channels(int): Number of output channels of the GFE module.
+            Default: 128
+        expand_ratio (int): Adjusts number of channels of the hidden layer
+            in InvertedResidual by this amount.
+            Default: 6
+        num_blocks (tuple[int]): Tuple of ints. Each int specifies the
+            number of times each Inverted Residual module is repeated.
+            The repeated Inverted Residual modules are called a 'group'.
+            Default: (3, 3, 3)
+        strides (tuple[int]): Tuple of ints. Each int specifies
+            the downsampling factor of each 'group'.
+            Default: (2, 2, 1)
+        pool_scales (tuple[int]): Tuple of ints. Each int specifies
+            the parameter required in 'global average pooling' within PPM.
+            Default: (1, 2, 3, 6)
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False
+    """
+
+    def __init__(self,
+                 in_channels=64,
+                 block_channels=(64, 96, 128),
+                 out_channels=128,
+                 expand_ratio=6,
+                 num_blocks=(3, 3, 3),
+                 strides=(2, 2, 1),
+                 pool_scales=(1, 2, 3, 6),
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 align_corners=False):
+        super(GlobalFeatureExtractor, self).__init__()
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        assert len(block_channels) == len(num_blocks) == 3
+        self.bottleneck1 = self._make_layer(in_channels, block_channels[0],
+                                            num_blocks[0], strides[0],
+                                            expand_ratio)
+        self.bottleneck2 = self._make_layer(block_channels[0],
+                                            block_channels[1], num_blocks[1],
+                                            strides[1], expand_ratio)
+        self.bottleneck3 = self._make_layer(block_channels[1],
+                                            block_channels[2], num_blocks[2],
+                                            strides[2], expand_ratio)
+        self.ppm = PPM(
+            pool_scales,
+            block_channels[2],
+            block_channels[2] // 4,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=align_corners)
+        self.out = ConvModule(
+            block_channels[2] * 2,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def _make_layer(self,
+                    in_channels,
+                    out_channels,
+                    blocks,
+                    stride=1,
+                    expand_ratio=6):
+        layers = [
+            InvertedResidual(
+                in_channels,
+                out_channels,
+                stride,
+                expand_ratio,
+                norm_cfg=self.norm_cfg)
+        ]
+        for i in range(1, blocks):
+            layers.append(
+                InvertedResidual(
+                    out_channels,
+                    out_channels,
+                    1,
+                    expand_ratio,
+                    norm_cfg=self.norm_cfg))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.bottleneck1(x)
+        x = self.bottleneck2(x)
+        x = self.bottleneck3(x)
+        x = torch.cat([x, *self.ppm(x)], dim=1)
+        x = self.out(x)
+        return x
+
+
+class FeatureFusionModule(nn.Module):
+    """Feature fusion module.
+
+    Args:
+        higher_in_channels (int): Number of input channels of the
+            higher-resolution branch.
+        lower_in_channels (int): Number of input channels of the
+            lower-resolution branch.
+        out_channels (int): Number of output channels.
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False
+    """
+
+    def __init__(self,
+                 higher_in_channels,
+                 lower_in_channels,
+                 out_channels,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 align_corners=False):
+        super(FeatureFusionModule, self).__init__()
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+        self.dwconv = ConvModule(
+            lower_in_channels,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.conv_lower_res = ConvModule(
+            out_channels,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.conv_higher_res = ConvModule(
+            higher_in_channels,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.relu = nn.ReLU(True)
+
+    def forward(self, higher_res_feature, lower_res_feature):
+        lower_res_feature = resize(
+            lower_res_feature,
+            size=higher_res_feature.size()[2:],
+            mode='bilinear',
+            align_corners=self.align_corners)
+        lower_res_feature = self.dwconv(lower_res_feature)
+        lower_res_feature = self.conv_lower_res(lower_res_feature)
+
+        higher_res_feature = self.conv_higher_res(higher_res_feature)
+        out = higher_res_feature + lower_res_feature
+        return self.relu(out)
+
+
+@BACKBONES.register_module()
+class FastSCNN(nn.Module):
+    """Fast-SCNN Backbone.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        downsample_dw_channels (tuple[int]): Number of output channels after
+            the first conv layer & the second conv layer in
+            Learning-To-Downsample (LTD) module.
+            Default: (32, 48).
+        global_in_channels (int): Number of input channels of
+            Global Feature Extractor(GFE).
+            Equal to number of output channels of LTD.
+            Default: 64.
+        global_block_channels (tuple[int]): Tuple of integers that describe
+            the output channels for each of the MobileNet-v2 bottleneck
+            residual blocks in GFE.
+            Default: (64, 96, 128).
+        global_block_strides (tuple[int]): Tuple of integers
+            that describe the strides (downsampling factors) for each of the
+            MobileNet-v2 bottleneck residual blocks in GFE.
+            Default: (2, 2, 1).
+        global_out_channels (int): Number of output channels of GFE.
+            Default: 128.
+        higher_in_channels (int): Number of input channels of the higher
+            resolution branch in FFM.
+            Equal to global_in_channels.
+            Default: 64.
+        lower_in_channels (int): Number of input channels of  the lower
+            resolution branch in FFM.
+            Equal to global_out_channels.
+            Default: 128.
+        fusion_out_channels (int): Number of output channels of FFM.
+            Default: 128.
+        out_indices (tuple): Tuple of indices of list
+            [higher_res_features, lower_res_features, fusion_output].
+            Often set to (0,1,2) to enable aux. heads.
+            Default: (0, 1, 2).
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False
+    """
+
+    def __init__(self,
+                 in_channels=3,
+                 downsample_dw_channels=(32, 48),
+                 global_in_channels=64,
+                 global_block_channels=(64, 96, 128),
+                 global_block_strides=(2, 2, 1),
+                 global_out_channels=128,
+                 higher_in_channels=64,
+                 lower_in_channels=128,
+                 fusion_out_channels=128,
+                 out_indices=(0, 1, 2),
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 align_corners=False):
+
+        super(FastSCNN, self).__init__()
+        if global_in_channels != higher_in_channels:
+            raise AssertionError('Global Input Channels must be the same \
+                                 with Higher Input Channels!')
+        elif global_out_channels != lower_in_channels:
+            raise AssertionError('Global Output Channels must be the same \
+                                with Lower Input Channels!')
+
+        self.in_channels = in_channels
+        self.downsample_dw_channels1 = downsample_dw_channels[0]
+        self.downsample_dw_channels2 = downsample_dw_channels[1]
+        self.global_in_channels = global_in_channels
+        self.global_block_channels = global_block_channels
+        self.global_block_strides = global_block_strides
+        self.global_out_channels = global_out_channels
+        self.higher_in_channels = higher_in_channels
+        self.lower_in_channels = lower_in_channels
+        self.fusion_out_channels = fusion_out_channels
+        self.out_indices = out_indices
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+        self.learning_to_downsample = LearningToDownsample(
+            in_channels,
+            downsample_dw_channels,
+            global_in_channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.global_feature_extractor = GlobalFeatureExtractor(
+            global_in_channels,
+            global_block_channels,
+            global_out_channels,
+            strides=self.global_block_strides,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=self.align_corners)
+        self.feature_fusion = FeatureFusionModule(
+            higher_in_channels,
+            lower_in_channels,
+            fusion_out_channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=self.align_corners)
+
+    def init_weights(self, pretrained=None):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                kaiming_init(m)
+            elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                constant_init(m, 1)
+
+    def forward(self, x):
+        higher_res_features = self.learning_to_downsample(x)
+        lower_res_features = self.global_feature_extractor(higher_res_features)
+        fusion_output = self.feature_fusion(higher_res_features,
+                                            lower_res_features)
+
+        outs = [higher_res_features, lower_res_features, fusion_output]
+        outs = [outs[i] for i in self.out_indices]
+        return tuple(outs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/hrnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..33f3ba86d884bc53d1e27dc491604dd30d317161
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/hrnet.py
@@ -0,0 +1,555 @@
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
+                      kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmseg.ops import Upsample, resize
+from mmseg.utils import get_root_logger
+from ..builder import BACKBONES
+from .resnet import BasicBlock, Bottleneck
+
+
+class HRModule(nn.Module):
+    """High-Resolution Module for HRNet.
+
+    In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange
+    is in this module.
+    """
+
+    def __init__(self,
+                 num_branches,
+                 blocks,
+                 num_blocks,
+                 in_channels,
+                 num_channels,
+                 multiscale_output=True,
+                 with_cp=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN', requires_grad=True)):
+        super(HRModule, self).__init__()
+        self._check_branches(num_branches, num_blocks, in_channels,
+                             num_channels)
+
+        self.in_channels = in_channels
+        self.num_branches = num_branches
+
+        self.multiscale_output = multiscale_output
+        self.norm_cfg = norm_cfg
+        self.conv_cfg = conv_cfg
+        self.with_cp = with_cp
+        self.branches = self._make_branches(num_branches, blocks, num_blocks,
+                                            num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(inplace=False)
+
+    def _check_branches(self, num_branches, num_blocks, in_channels,
+                        num_channels):
+        """Check branches configuration."""
+        if num_branches != len(num_blocks):
+            error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \
+                        f'{len(num_blocks)})'
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_channels):
+            error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \
+                        f'{len(num_channels)})'
+            raise ValueError(error_msg)
+
+        if num_branches != len(in_channels):
+            error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \
+                        f'{len(in_channels)})'
+            raise ValueError(error_msg)
+
+    def _make_one_branch(self,
+                         branch_index,
+                         block,
+                         num_blocks,
+                         num_channels,
+                         stride=1):
+        """Build one branch."""
+        downsample = None
+        if stride != 1 or \
+                self.in_channels[branch_index] != \
+                num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                build_conv_layer(
+                    self.conv_cfg,
+                    self.in_channels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, num_channels[branch_index] *
+                                 block.expansion)[1])
+
+        layers = []
+        layers.append(
+            block(
+                self.in_channels[branch_index],
+                num_channels[branch_index],
+                stride,
+                downsample=downsample,
+                with_cp=self.with_cp,
+                norm_cfg=self.norm_cfg,
+                conv_cfg=self.conv_cfg))
+        self.in_channels[branch_index] = \
+            num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(
+                block(
+                    self.in_channels[branch_index],
+                    num_channels[branch_index],
+                    with_cp=self.with_cp,
+                    norm_cfg=self.norm_cfg,
+                    conv_cfg=self.conv_cfg))
+
+        return nn.Sequential(*layers)
+
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        """Build multiple branch."""
+        branches = []
+
+        for i in range(num_branches):
+            branches.append(
+                self._make_one_branch(i, block, num_blocks, num_channels))
+
+        return nn.ModuleList(branches)
+
+    def _make_fuse_layers(self):
+        """Build fuse layer."""
+        if self.num_branches == 1:
+            return None
+
+        num_branches = self.num_branches
+        in_channels = self.in_channels
+        fuse_layers = []
+        num_out_branches = num_branches if self.multiscale_output else 1
+        for i in range(num_out_branches):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            build_conv_layer(
+                                self.conv_cfg,
+                                in_channels[j],
+                                in_channels[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg, in_channels[i])[1],
+                            # we set align_corners=False for HRNet
+                            Upsample(
+                                scale_factor=2**(j - i),
+                                mode='bilinear',
+                                align_corners=False)))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv_downsamples = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    build_conv_layer(
+                                        self.conv_cfg,
+                                        in_channels[j],
+                                        in_channels[i],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        bias=False),
+                                    build_norm_layer(self.norm_cfg,
+                                                     in_channels[i])[1]))
+                        else:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    build_conv_layer(
+                                        self.conv_cfg,
+                                        in_channels[j],
+                                        in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        bias=False),
+                                    build_norm_layer(self.norm_cfg,
+                                                     in_channels[j])[1],
+                                    nn.ReLU(inplace=False)))
+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+
+        return nn.ModuleList(fuse_layers)
+
+    def forward(self, x):
+        """Forward function."""
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = 0
+            for j in range(self.num_branches):
+                if i == j:
+                    y += x[j]
+                elif j > i:
+                    y = y + resize(
+                        self.fuse_layers[i][j](x[j]),
+                        size=x[i].shape[2:],
+                        mode='bilinear',
+                        align_corners=False)
+                else:
+                    y += self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+        return x_fuse
+
+
+@BACKBONES.register_module()
+class HRNet(nn.Module):
+    """HRNet backbone.
+
+    High-Resolution Representations for Labeling Pixels and Regions
+    arXiv: https://arxiv.org/abs/1904.04514
+
+    Args:
+        extra (dict): detailed configuration for each stage of HRNet.
+        in_channels (int): Number of input image channels. Normally 3.
+        conv_cfg (dict): dictionary to construct and config conv layer.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+        zero_init_residual (bool): whether to use zero init for last norm layer
+            in resblocks to let them behave as identity.
+
+    Example:
+        >>> from mmseg.models import HRNet
+        >>> import torch
+        >>> extra = dict(
+        >>>     stage1=dict(
+        >>>         num_modules=1,
+        >>>         num_branches=1,
+        >>>         block='BOTTLENECK',
+        >>>         num_blocks=(4, ),
+        >>>         num_channels=(64, )),
+        >>>     stage2=dict(
+        >>>         num_modules=1,
+        >>>         num_branches=2,
+        >>>         block='BASIC',
+        >>>         num_blocks=(4, 4),
+        >>>         num_channels=(32, 64)),
+        >>>     stage3=dict(
+        >>>         num_modules=4,
+        >>>         num_branches=3,
+        >>>         block='BASIC',
+        >>>         num_blocks=(4, 4, 4),
+        >>>         num_channels=(32, 64, 128)),
+        >>>     stage4=dict(
+        >>>         num_modules=3,
+        >>>         num_branches=4,
+        >>>         block='BASIC',
+        >>>         num_blocks=(4, 4, 4, 4),
+        >>>         num_channels=(32, 64, 128, 256)))
+        >>> self = HRNet(extra, in_channels=1)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 1, 32, 32)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_out in level_outputs:
+        ...     print(tuple(level_out.shape))
+        (1, 32, 8, 8)
+        (1, 64, 4, 4)
+        (1, 128, 2, 2)
+        (1, 256, 1, 1)
+    """
+
+    blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+
+    def __init__(self,
+                 extra,
+                 in_channels=3,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 norm_eval=False,
+                 with_cp=False,
+                 zero_init_residual=False):
+        super(HRNet, self).__init__()
+        self.extra = extra
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.norm_eval = norm_eval
+        self.with_cp = with_cp
+        self.zero_init_residual = zero_init_residual
+
+        # stem net
+        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
+        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)
+
+        self.conv1 = build_conv_layer(
+            self.conv_cfg,
+            in_channels,
+            64,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            bias=False)
+
+        self.add_module(self.norm1_name, norm1)
+        self.conv2 = build_conv_layer(
+            self.conv_cfg,
+            64,
+            64,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            bias=False)
+
+        self.add_module(self.norm2_name, norm2)
+        self.relu = nn.ReLU(inplace=True)
+
+        # stage 1
+        self.stage1_cfg = self.extra['stage1']
+        num_channels = self.stage1_cfg['num_channels'][0]
+        block_type = self.stage1_cfg['block']
+        num_blocks = self.stage1_cfg['num_blocks'][0]
+
+        block = self.blocks_dict[block_type]
+        stage1_out_channels = num_channels * block.expansion
+        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)
+
+        # stage 2
+        self.stage2_cfg = self.extra['stage2']
+        num_channels = self.stage2_cfg['num_channels']
+        block_type = self.stage2_cfg['block']
+
+        block = self.blocks_dict[block_type]
+        num_channels = [channel * block.expansion for channel in num_channels]
+        self.transition1 = self._make_transition_layer([stage1_out_channels],
+                                                       num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels)
+
+        # stage 3
+        self.stage3_cfg = self.extra['stage3']
+        num_channels = self.stage3_cfg['num_channels']
+        block_type = self.stage3_cfg['block']
+
+        block = self.blocks_dict[block_type]
+        num_channels = [channel * block.expansion for channel in num_channels]
+        self.transition2 = self._make_transition_layer(pre_stage_channels,
+                                                       num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels)
+
+        # stage 4
+        self.stage4_cfg = self.extra['stage4']
+        num_channels = self.stage4_cfg['num_channels']
+        block_type = self.stage4_cfg['block']
+
+        block = self.blocks_dict[block_type]
+        num_channels = [channel * block.expansion for channel in num_channels]
+        self.transition3 = self._make_transition_layer(pre_stage_channels,
+                                                       num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels)
+
+    @property
+    def norm1(self):
+        """nn.Module: the normalization layer named "norm1" """
+        return getattr(self, self.norm1_name)
+
+    @property
+    def norm2(self):
+        """nn.Module: the normalization layer named "norm2" """
+        return getattr(self, self.norm2_name)
+
+    def _make_transition_layer(self, num_channels_pre_layer,
+                               num_channels_cur_layer):
+        """Make transition layer."""
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            build_conv_layer(
+                                self.conv_cfg,
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=3,
+                                stride=1,
+                                padding=1,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg,
+                                             num_channels_cur_layer[i])[1],
+                            nn.ReLU(inplace=True)))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv_downsamples = []
+                for j in range(i + 1 - num_branches_pre):
+                    in_channels = num_channels_pre_layer[-1]
+                    out_channels = num_channels_cur_layer[i] \
+                        if j == i - num_branches_pre else in_channels
+                    conv_downsamples.append(
+                        nn.Sequential(
+                            build_conv_layer(
+                                self.conv_cfg,
+                                in_channels,
+                                out_channels,
+                                kernel_size=3,
+                                stride=2,
+                                padding=1,
+                                bias=False),
+                            build_norm_layer(self.norm_cfg, out_channels)[1],
+                            nn.ReLU(inplace=True)))
+                transition_layers.append(nn.Sequential(*conv_downsamples))
+
+        return nn.ModuleList(transition_layers)
+
+    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+        """Make each layer."""
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                build_conv_layer(
+                    self.conv_cfg,
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+        layers = []
+        layers.append(
+            block(
+                inplanes,
+                planes,
+                stride,
+                downsample=downsample,
+                with_cp=self.with_cp,
+                norm_cfg=self.norm_cfg,
+                conv_cfg=self.conv_cfg))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(
+                block(
+                    inplanes,
+                    planes,
+                    with_cp=self.with_cp,
+                    norm_cfg=self.norm_cfg,
+                    conv_cfg=self.conv_cfg))
+
+        return nn.Sequential(*layers)
+
+    def _make_stage(self, layer_config, in_channels, multiscale_output=True):
+        """Make each stage."""
+        num_modules = layer_config['num_modules']
+        num_branches = layer_config['num_branches']
+        num_blocks = layer_config['num_blocks']
+        num_channels = layer_config['num_channels']
+        block = self.blocks_dict[layer_config['block']]
+
+        hr_modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used for the last module
+            if not multiscale_output and i == num_modules - 1:
+                reset_multiscale_output = False
+            else:
+                reset_multiscale_output = True
+
+            hr_modules.append(
+                HRModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    in_channels,
+                    num_channels,
+                    reset_multiscale_output,
+                    with_cp=self.with_cp,
+                    norm_cfg=self.norm_cfg,
+                    conv_cfg=self.conv_cfg))
+
+        return nn.Sequential(*hr_modules), in_channels
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+
+            if self.zero_init_residual:
+                for m in self.modules():
+                    if isinstance(m, Bottleneck):
+                        constant_init(m.norm3, 0)
+                    elif isinstance(m, BasicBlock):
+                        constant_init(m.norm2, 0)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Forward function."""
+
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.norm2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+
+        x_list = []
+        for i in range(self.stage2_cfg['num_branches']):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        x_list = []
+        for i in range(self.stage3_cfg['num_branches']):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        x_list = []
+        for i in range(self.stage4_cfg['num_branches']):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+
+        return y_list
+
+    def train(self, mode=True):
+        """Convert the model into training mode whill keeping the normalization
+        layer freezed."""
+        super(HRNet, self).train(mode)
+        if mode and self.norm_eval:
+            for m in self.modules():
+                # trick: eval have effect on BatchNorm only
+                if isinstance(m, _BatchNorm):
+                    m.eval()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mix_transformer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mix_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff35635a0044753f6feb79b79332261e794291d5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mix_transformer.py
@@ -0,0 +1,481 @@
+# ---------------------------------------------------------------
+# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+#
+# This work is licensed under the NVIDIA Source Code License
+# ---------------------------------------------------------------
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.contrib.npu.optimized_lib import module as nnn
+from functools import partial
+
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+from timm.models.vision_transformer import _cfg
+from mmseg.models.builder import BACKBONES
+from mmseg.utils import get_root_logger
+from mmcv.runner import load_checkpoint
+import math
+
+class NpuLinear(nn.Linear):
+    def forward(self, input):
+        return torch.npu_linear(input, self.weight, self.bias)
+
+class MatmulApply(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, self, mat2):
+        ctx.save_for_backward(self, mat2)
+        result = torch.matmul(self, mat2.transpose(-2, -1))
+        return result.detach()
+    @staticmethod
+    def backward(ctx, grad):
+        self, mat2 = ctx.saved_tensors
+        self_grad = torch.npu_bmmV2(grad, mat2, [])
+        mat2_grad = torch.npu_bmmV2(grad.transpose(-2, -1), self, [])
+        return self_grad, mat2_grad
+
+def matmul_transpose(tensor1, tensor2):
+    return MatmulApply.apply(tensor1, tensor2)
+    
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        #self.drop = nn.Dropout(drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        #x = self.drop(x)
+        x = self.fc2(x)
+        #x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        #self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        #self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        
+        attn = matmul_transpose(q, k) * self.scale
+        attn = attn.softmax(dim=-1)
+        #attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        #x = self.proj_drop(x)
+
+        return x
+
+
+class Block(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W)) #self.drop_path()
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W)) #self.drop_path()
+
+        return x
+
+
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def forward(self, x):
+        if len(x.shape) == 3:
+            x = x.squeeze(0)        
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        #B, H*W, C        
+
+        return x, H, W
+
+
+class MixVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_chans=in_chans,
+                                              embed_dim=embed_dims[0])
+        self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
+                                              embed_dim=embed_dims[1])
+        self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
+                                              embed_dim=embed_dims[2])
+        self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],
+                                              embed_dim=embed_dims[3])
+
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        self.norm1 = norm_layer(embed_dims[0])
+
+        cur += depths[0]
+        self.block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        self.norm2 = norm_layer(embed_dims[1])
+
+        cur += depths[1]
+        self.block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        self.norm3 = norm_layer(embed_dims[2])
+
+        cur += depths[2]
+        self.block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+        self.norm4 = norm_layer(embed_dims[3])
+
+        # classification head
+        # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger)
+
+    def reset_drop_path(self, drop_path_rate):
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
+        cur = 0
+        for i in range(self.depths[0]):
+            self.block1[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[0]
+        for i in range(self.depths[1]):
+            self.block2[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[1]
+        for i in range(self.depths[2]):
+            self.block3[i].drop_path.drop_prob = dpr[cur + i]
+
+        cur += self.depths[2]
+        for i in range(self.depths[3]):
+            self.block4[i].drop_path.drop_prob = dpr[cur + i]
+
+    def freeze_patch_emb(self):
+        self.patch_embed1.requires_grad = False
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'}  # has pos_embed may be better
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=''):
+        self.num_classes = num_classes
+        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+    def forward_features(self, x):
+        
+        outs = []
+
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        B = x.shape[0]                
+        for i, blk in enumerate(self.block1):
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        #outs.append(x)
+        x = x.transpose(1, 2).reshape(B, -1, H, W)
+        outs.append(x)
+
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for i, blk in enumerate(self.block2):
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        
+        x = x.transpose(1, 2).reshape(B, -1, H, W)
+        outs.append(x)
+
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for i, blk in enumerate(self.block3):
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        
+        x = x.transpose(1, 2).reshape(B, -1, H, W)
+        outs.append(x)
+
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for i, blk in enumerate(self.block4):
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.transpose(1, 2).reshape(B, -1, H, W)
+        outs.append(x)                
+
+        return outs
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head(x)
+
+        return x
+
+
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W)
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+
+        return x
+
+
+
+@BACKBONES.register_module()
+class mit_b0(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b0, self).__init__(
+            patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+
+
+@BACKBONES.register_module()
+class mit_b1(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b1, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+
+
+@BACKBONES.register_module()
+class mit_b2(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b2, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+
+
+@BACKBONES.register_module()
+class mit_b3(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b3, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+
+
+@BACKBONES.register_module()
+class mit_b4(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b4, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+
+
+@BACKBONES.register_module()
+class mit_b5(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b5, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mobilenet_v2.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mobilenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..5820b4b13c0019d67801c5f924650e928acca72e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mobilenet_v2.py
@@ -0,0 +1,180 @@
+import logging
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from mmcv.runner import load_checkpoint
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from ..utils import InvertedResidual, make_divisible
+
+
+@BACKBONES.register_module()
+class MobileNetV2(nn.Module):
+    """MobileNetV2 backbone.
+
+    Args:
+        widen_factor (float): Width multiplier, multiply number of
+            channels in each layer by this amount. Default: 1.0.
+        strides (Sequence[int], optional): Strides of the first block of each
+            layer. If not specified, default config in ``arch_setting`` will
+            be used.
+        dilations (Sequence[int]): Dilation of each layer.
+        out_indices (None or Sequence[int]): Output from which stages.
+            Default: (7, ).
+        frozen_stages (int): Stages to be frozen (all param fixed).
+            Default: -1, which means not freezing any parameters.
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU6').
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only. Default: False.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+    """
+
+    # Parameters to build layers. 3 parameters are needed to construct a
+    # layer, from left to right: expand_ratio, channel, num_blocks.
+    arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4],
+                     [6, 96, 3], [6, 160, 3], [6, 320, 1]]
+
+    def __init__(self,
+                 widen_factor=1.,
+                 strides=(1, 2, 2, 2, 1, 2, 1),
+                 dilations=(1, 1, 1, 1, 1, 1, 1),
+                 out_indices=(1, 2, 4, 6),
+                 frozen_stages=-1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU6'),
+                 norm_eval=False,
+                 with_cp=False):
+        super(MobileNetV2, self).__init__()
+        self.widen_factor = widen_factor
+        self.strides = strides
+        self.dilations = dilations
+        assert len(strides) == len(dilations) == len(self.arch_settings)
+        self.out_indices = out_indices
+        for index in out_indices:
+            if index not in range(0, 7):
+                raise ValueError('the item in out_indices must in '
+                                 f'range(0, 8). But received {index}')
+
+        if frozen_stages not in range(-1, 7):
+            raise ValueError('frozen_stages must be in range(-1, 7). '
+                             f'But received {frozen_stages}')
+        self.out_indices = out_indices
+        self.frozen_stages = frozen_stages
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.norm_eval = norm_eval
+        self.with_cp = with_cp
+
+        self.in_channels = make_divisible(32 * widen_factor, 8)
+
+        self.conv1 = ConvModule(
+            in_channels=3,
+            out_channels=self.in_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+        self.layers = []
+
+        for i, layer_cfg in enumerate(self.arch_settings):
+            expand_ratio, channel, num_blocks = layer_cfg
+            stride = self.strides[i]
+            dilation = self.dilations[i]
+            out_channels = make_divisible(channel * widen_factor, 8)
+            inverted_res_layer = self.make_layer(
+                out_channels=out_channels,
+                num_blocks=num_blocks,
+                stride=stride,
+                dilation=dilation,
+                expand_ratio=expand_ratio)
+            layer_name = f'layer{i + 1}'
+            self.add_module(layer_name, inverted_res_layer)
+            self.layers.append(layer_name)
+
+    def make_layer(self, out_channels, num_blocks, stride, dilation,
+                   expand_ratio):
+        """Stack InvertedResidual blocks to build a layer for MobileNetV2.
+
+        Args:
+            out_channels (int): out_channels of block.
+            num_blocks (int): Number of blocks.
+            stride (int): Stride of the first block.
+            dilation (int): Dilation of the first block.
+            expand_ratio (int): Expand the number of channels of the
+                hidden layer in InvertedResidual by this ratio.
+        """
+        layers = []
+        for i in range(num_blocks):
+            layers.append(
+                InvertedResidual(
+                    self.in_channels,
+                    out_channels,
+                    stride if i == 0 else 1,
+                    expand_ratio=expand_ratio,
+                    dilation=dilation if i == 0 else 1,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg,
+                    with_cp=self.with_cp))
+            self.in_channels = out_channels
+
+        return nn.Sequential(*layers)
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        x = self.conv1(x)
+
+        outs = []
+        for i, layer_name in enumerate(self.layers):
+            layer = getattr(self, layer_name)
+            x = layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+
+        if len(outs) == 1:
+            return outs[0]
+        else:
+            return tuple(outs)
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            for param in self.conv1.parameters():
+                param.requires_grad = False
+        for i in range(1, self.frozen_stages + 1):
+            layer = getattr(self, f'layer{i}')
+            layer.eval()
+            for param in layer.parameters():
+                param.requires_grad = False
+
+    def train(self, mode=True):
+        super(MobileNetV2, self).train(mode)
+        self._freeze_stages()
+        if mode and self.norm_eval:
+            for m in self.modules():
+                if isinstance(m, _BatchNorm):
+                    m.eval()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mobilenet_v3.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mobilenet_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..104d8328af1536632770faa9e3d1827b1a364d5f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/mobilenet_v3.py
@@ -0,0 +1,255 @@
+import logging
+
+import mmcv
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from mmcv.cnn.bricks import Conv2dAdaptivePadding
+from mmcv.runner import load_checkpoint
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from ..builder import BACKBONES
+from ..utils import InvertedResidualV3 as InvertedResidual
+
+
+@BACKBONES.register_module()
+class MobileNetV3(nn.Module):
+    """MobileNetV3 backbone.
+
+    This backbone is the improved implementation of `Searching for MobileNetV3
+    <https://ieeexplore.ieee.org/document/9008835>`_.
+
+    Args:
+        arch (str): Architechture of mobilnetv3, from {'small', 'large'}.
+            Default: 'small'.
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        out_indices (tuple[int]): Output from which layer.
+            Default: (0, 1, 12).
+        frozen_stages (int): Stages to be frozen (all param fixed).
+            Defualt: -1, which means not freezing any parameters.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only. Default: False.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save
+            some memory while slowing down the training speed.
+            Defualt: False.
+    """
+    # Parameters to build each block:
+    #     [kernel size, mid channels, out channels, with_se, act type, stride]
+    arch_settings = {
+        'small': [[3, 16, 16, True, 'ReLU', 2],  # block0 layer1 os=4
+                  [3, 72, 24, False, 'ReLU', 2],  # block1 layer2 os=8
+                  [3, 88, 24, False, 'ReLU', 1],
+                  [5, 96, 40, True, 'HSwish', 2],  # block2 layer4 os=16
+                  [5, 240, 40, True, 'HSwish', 1],
+                  [5, 240, 40, True, 'HSwish', 1],
+                  [5, 120, 48, True, 'HSwish', 1],  # block3 layer7 os=16
+                  [5, 144, 48, True, 'HSwish', 1],
+                  [5, 288, 96, True, 'HSwish', 2],  # block4 layer9 os=32
+                  [5, 576, 96, True, 'HSwish', 1],
+                  [5, 576, 96, True, 'HSwish', 1]],
+        'large': [[3, 16, 16, False, 'ReLU', 1],  # block0 layer1 os=2
+                  [3, 64, 24, False, 'ReLU', 2],  # block1 layer2 os=4
+                  [3, 72, 24, False, 'ReLU', 1],
+                  [5, 72, 40, True, 'ReLU', 2],  # block2 layer4 os=8
+                  [5, 120, 40, True, 'ReLU', 1],
+                  [5, 120, 40, True, 'ReLU', 1],
+                  [3, 240, 80, False, 'HSwish', 2],  # block3 layer7 os=16
+                  [3, 200, 80, False, 'HSwish', 1],
+                  [3, 184, 80, False, 'HSwish', 1],
+                  [3, 184, 80, False, 'HSwish', 1],
+                  [3, 480, 112, True, 'HSwish', 1],  # block4 layer11 os=16
+                  [3, 672, 112, True, 'HSwish', 1],
+                  [5, 672, 160, True, 'HSwish', 2],  # block5 layer13 os=32
+                  [5, 960, 160, True, 'HSwish', 1],
+                  [5, 960, 160, True, 'HSwish', 1]]
+    }  # yapf: disable
+
+    def __init__(self,
+                 arch='small',
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 out_indices=(0, 1, 12),
+                 frozen_stages=-1,
+                 reduction_factor=1,
+                 norm_eval=False,
+                 with_cp=False):
+        super(MobileNetV3, self).__init__()
+        assert arch in self.arch_settings
+        assert isinstance(reduction_factor, int) and reduction_factor > 0
+        assert mmcv.is_tuple_of(out_indices, int)
+        for index in out_indices:
+            if index not in range(0, len(self.arch_settings[arch]) + 2):
+                raise ValueError(
+                    'the item in out_indices must in '
+                    f'range(0, {len(self.arch_settings[arch])+2}). '
+                    f'But received {index}')
+
+        if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2):
+            raise ValueError('frozen_stages must be in range(-1, '
+                             f'{len(self.arch_settings[arch])+2}). '
+                             f'But received {frozen_stages}')
+        self.arch = arch
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.out_indices = out_indices
+        self.frozen_stages = frozen_stages
+        self.reduction_factor = reduction_factor
+        self.norm_eval = norm_eval
+        self.with_cp = with_cp
+        self.layers = self._make_layer()
+
+    def _make_layer(self):
+        layers = []
+
+        # build the first layer (layer0)
+        in_channels = 16
+        layer = ConvModule(
+            in_channels=3,
+            out_channels=in_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            conv_cfg=dict(type='Conv2dAdaptivePadding'),
+            norm_cfg=self.norm_cfg,
+            act_cfg=dict(type='HSwish'))
+        self.add_module('layer0', layer)
+        layers.append('layer0')
+
+        layer_setting = self.arch_settings[self.arch]
+        for i, params in enumerate(layer_setting):
+            (kernel_size, mid_channels, out_channels, with_se, act,
+             stride) = params
+
+            if self.arch == 'large' and i >= 12 or self.arch == 'small' and \
+                    i >= 8:
+                mid_channels = mid_channels // self.reduction_factor
+                out_channels = out_channels // self.reduction_factor
+
+            if with_se:
+                se_cfg = dict(
+                    channels=mid_channels,
+                    ratio=4,
+                    act_cfg=(dict(type='ReLU'),
+                             dict(type='HSigmoid', bias=3.0, divisor=6.0)))
+            else:
+                se_cfg = None
+
+            layer = InvertedResidual(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                mid_channels=mid_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                se_cfg=se_cfg,
+                with_expand_conv=(in_channels != mid_channels),
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=dict(type=act),
+                with_cp=self.with_cp)
+            in_channels = out_channels
+            layer_name = 'layer{}'.format(i + 1)
+            self.add_module(layer_name, layer)
+            layers.append(layer_name)
+
+        # build the last layer
+        # block5 layer12 os=32 for small model
+        # block6 layer16 os=32 for large model
+        layer = ConvModule(
+            in_channels=in_channels,
+            out_channels=576 if self.arch == 'small' else 960,
+            kernel_size=1,
+            stride=1,
+            dilation=4,
+            padding=0,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=dict(type='HSwish'))
+        layer_name = 'layer{}'.format(len(layer_setting) + 1)
+        self.add_module(layer_name, layer)
+        layers.append(layer_name)
+
+        # next, convert backbone MobileNetV3 to a semantic segmentation version
+        if self.arch == 'small':
+            self.layer4.depthwise_conv.conv.stride = (1, 1)
+            self.layer9.depthwise_conv.conv.stride = (1, 1)
+            for i in range(4, len(layers)):
+                layer = getattr(self, layers[i])
+                if isinstance(layer, InvertedResidual):
+                    modified_module = layer.depthwise_conv.conv
+                else:
+                    modified_module = layer.conv
+
+                if i < 9:
+                    modified_module.dilation = (2, 2)
+                    pad = 2
+                else:
+                    modified_module.dilation = (4, 4)
+                    pad = 4
+
+                if not isinstance(modified_module, Conv2dAdaptivePadding):
+                    # Adjust padding
+                    pad *= (modified_module.kernel_size[0] - 1) // 2
+                    modified_module.padding = (pad, pad)
+        else:
+            self.layer7.depthwise_conv.conv.stride = (1, 1)
+            self.layer13.depthwise_conv.conv.stride = (1, 1)
+            for i in range(7, len(layers)):
+                layer = getattr(self, layers[i])
+                if isinstance(layer, InvertedResidual):
+                    modified_module = layer.depthwise_conv.conv
+                else:
+                    modified_module = layer.conv
+
+                if i < 13:
+                    modified_module.dilation = (2, 2)
+                    pad = 2
+                else:
+                    modified_module.dilation = (4, 4)
+                    pad = 4
+
+                if not isinstance(modified_module, Conv2dAdaptivePadding):
+                    # Adjust padding
+                    pad *= (modified_module.kernel_size[0] - 1) // 2
+                    modified_module.padding = (pad, pad)
+
+        return layers
+
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = logging.getLogger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, nn.BatchNorm2d):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        outs = []
+        for i, layer_name in enumerate(self.layers):
+            layer = getattr(self, layer_name)
+            x = layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return outs
+
+    def _freeze_stages(self):
+        for i in range(self.frozen_stages + 1):
+            layer = getattr(self, f'layer{i}')
+            layer.eval()
+            for param in layer.parameters():
+                param.requires_grad = False
+
+    def train(self, mode=True):
+        super(MobileNetV3, self).train(mode)
+        self._freeze_stages()
+        if mode and self.norm_eval:
+            for m in self.modules():
+                if isinstance(m, _BatchNorm):
+                    m.eval()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnest.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnest.py
new file mode 100644
index 0000000000000000000000000000000000000000..8931decb876e4d46407fd177a5248fe2554e4062
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnest.py
@@ -0,0 +1,314 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from ..utils import ResLayer
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResNetV1d
+
+
+class RSoftmax(nn.Module):
+    """Radix Softmax module in ``SplitAttentionConv2d``.
+
+    Args:
+        radix (int): Radix of input.
+        groups (int): Groups of input.
+    """
+
+    def __init__(self, radix, groups):
+        super().__init__()
+        self.radix = radix
+        self.groups = groups
+
+    def forward(self, x):
+        batch = x.size(0)
+        if self.radix > 1:
+            x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2)
+            x = F.softmax(x, dim=1)
+            x = x.reshape(batch, -1)
+        else:
+            x = torch.sigmoid(x)
+        return x
+
+
+class SplitAttentionConv2d(nn.Module):
+    """Split-Attention Conv2d in ResNeSt.
+
+    Args:
+        in_channels (int): Same as nn.Conv2d.
+        out_channels (int): Same as nn.Conv2d.
+        kernel_size (int | tuple[int]): Same as nn.Conv2d.
+        stride (int | tuple[int]): Same as nn.Conv2d.
+        padding (int | tuple[int]): Same as nn.Conv2d.
+        dilation (int | tuple[int]): Same as nn.Conv2d.
+        groups (int): Same as nn.Conv2d.
+        radix (int): Radix of SpltAtConv2d. Default: 2
+        reduction_factor (int): Reduction factor of inter_channels. Default: 4.
+        conv_cfg (dict): Config dict for convolution layer. Default: None,
+            which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+        dcn (dict): Config dict for DCN. Default: None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 radix=2,
+                 reduction_factor=4,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 dcn=None):
+        super(SplitAttentionConv2d, self).__init__()
+        inter_channels = max(in_channels * radix // reduction_factor, 32)
+        self.radix = radix
+        self.groups = groups
+        self.channels = channels
+        self.with_dcn = dcn is not None
+        self.dcn = dcn
+        fallback_on_stride = False
+        if self.with_dcn:
+            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
+        if self.with_dcn and not fallback_on_stride:
+            assert conv_cfg is None, 'conv_cfg must be None for DCN'
+            conv_cfg = dcn
+        self.conv = build_conv_layer(
+            conv_cfg,
+            in_channels,
+            channels * radix,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups * radix,
+            bias=False)
+        self.norm0_name, norm0 = build_norm_layer(
+            norm_cfg, channels * radix, postfix=0)
+        self.add_module(self.norm0_name, norm0)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc1 = build_conv_layer(
+            None, channels, inter_channels, 1, groups=self.groups)
+        self.norm1_name, norm1 = build_norm_layer(
+            norm_cfg, inter_channels, postfix=1)
+        self.add_module(self.norm1_name, norm1)
+        self.fc2 = build_conv_layer(
+            None, inter_channels, channels * radix, 1, groups=self.groups)
+        self.rsoftmax = RSoftmax(radix, groups)
+
+    @property
+    def norm0(self):
+        """nn.Module: the normalization layer named "norm0" """
+        return getattr(self, self.norm0_name)
+
+    @property
+    def norm1(self):
+        """nn.Module: the normalization layer named "norm1" """
+        return getattr(self, self.norm1_name)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.norm0(x)
+        x = self.relu(x)
+
+        batch, rchannel = x.shape[:2]
+        batch = x.size(0)
+        if self.radix > 1:
+            splits = x.view(batch, self.radix, -1, *x.shape[2:])
+            gap = splits.sum(dim=1)
+        else:
+            gap = x
+        gap = F.adaptive_avg_pool2d(gap, 1)
+        gap = self.fc1(gap)
+
+        gap = self.norm1(gap)
+        gap = self.relu(gap)
+
+        atten = self.fc2(gap)
+        atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
+
+        if self.radix > 1:
+            attens = atten.view(batch, self.radix, -1, *atten.shape[2:])
+            out = torch.sum(attens * splits, dim=1)
+        else:
+            out = atten * x
+        return out.contiguous()
+
+
+class Bottleneck(_Bottleneck):
+    """Bottleneck block for ResNeSt.
+
+    Args:
+        inplane (int): Input planes of this block.
+        planes (int): Middle planes of this block.
+        groups (int): Groups of conv2.
+        width_per_group (int): Width per group of conv2. 64x4d indicates
+            ``groups=64, width_per_group=4`` and 32x8d indicates
+            ``groups=32, width_per_group=8``.
+        radix (int): Radix of SpltAtConv2d. Default: 2
+        reduction_factor (int): Reduction factor of inter_channels in
+            SplitAttentionConv2d. Default: 4.
+        avg_down_stride (bool): Whether to use average pool for stride in
+            Bottleneck. Default: True.
+        kwargs (dict): Key word arguments for base class.
+    """
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 groups=1,
+                 base_width=4,
+                 base_channels=64,
+                 radix=2,
+                 reduction_factor=4,
+                 avg_down_stride=True,
+                 **kwargs):
+        """Bottleneck block for ResNeSt."""
+        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
+
+        if groups == 1:
+            width = self.planes
+        else:
+            width = math.floor(self.planes *
+                               (base_width / base_channels)) * groups
+
+        self.avg_down_stride = avg_down_stride and self.conv2_stride > 1
+
+        self.norm1_name, norm1 = build_norm_layer(
+            self.norm_cfg, width, postfix=1)
+        self.norm3_name, norm3 = build_norm_layer(
+            self.norm_cfg, self.planes * self.expansion, postfix=3)
+
+        self.conv1 = build_conv_layer(
+            self.conv_cfg,
+            self.inplanes,
+            width,
+            kernel_size=1,
+            stride=self.conv1_stride,
+            bias=False)
+        self.add_module(self.norm1_name, norm1)
+        self.with_modulated_dcn = False
+        self.conv2 = SplitAttentionConv2d(
+            width,
+            width,
+            kernel_size=3,
+            stride=1 if self.avg_down_stride else self.conv2_stride,
+            padding=self.dilation,
+            dilation=self.dilation,
+            groups=groups,
+            radix=radix,
+            reduction_factor=reduction_factor,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            dcn=self.dcn)
+        delattr(self, self.norm2_name)
+
+        if self.avg_down_stride:
+            self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)
+
+        self.conv3 = build_conv_layer(
+            self.conv_cfg,
+            width,
+            self.planes * self.expansion,
+            kernel_size=1,
+            bias=False)
+        self.add_module(self.norm3_name, norm3)
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            identity = x
+
+            out = self.conv1(x)
+            out = self.norm1(out)
+            out = self.relu(out)
+
+            if self.with_plugins:
+                out = self.forward_plugin(out, self.after_conv1_plugin_names)
+
+            out = self.conv2(out)
+
+            if self.avg_down_stride:
+                out = self.avd_layer(out)
+
+            if self.with_plugins:
+                out = self.forward_plugin(out, self.after_conv2_plugin_names)
+
+            out = self.conv3(out)
+            out = self.norm3(out)
+
+            if self.with_plugins:
+                out = self.forward_plugin(out, self.after_conv3_plugin_names)
+
+            if self.downsample is not None:
+                identity = self.downsample(x)
+
+            out += identity
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        out = self.relu(out)
+
+        return out
+
+
+@BACKBONES.register_module()
+class ResNeSt(ResNetV1d):
+    """ResNeSt backbone.
+
+    Args:
+        groups (int): Number of groups of Bottleneck. Default: 1
+        base_width (int): Base width of Bottleneck. Default: 4
+        radix (int): Radix of SpltAtConv2d. Default: 2
+        reduction_factor (int): Reduction factor of inter_channels in
+            SplitAttentionConv2d. Default: 4.
+        avg_down_stride (bool): Whether to use average pool for stride in
+            Bottleneck. Default: True.
+        kwargs (dict): Keyword arguments for ResNet.
+    """
+
+    arch_settings = {
+        50: (Bottleneck, (3, 4, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3)),
+        200: (Bottleneck, (3, 24, 36, 3))
+    }
+
+    def __init__(self,
+                 groups=1,
+                 base_width=4,
+                 radix=2,
+                 reduction_factor=4,
+                 avg_down_stride=True,
+                 **kwargs):
+        self.groups = groups
+        self.base_width = base_width
+        self.radix = radix
+        self.reduction_factor = reduction_factor
+        self.avg_down_stride = avg_down_stride
+        super(ResNeSt, self).__init__(**kwargs)
+
+    def make_res_layer(self, **kwargs):
+        """Pack all blocks in a stage into a ``ResLayer``."""
+        return ResLayer(
+            groups=self.groups,
+            base_width=self.base_width,
+            base_channels=self.base_channels,
+            radix=self.radix,
+            reduction_factor=self.reduction_factor,
+            avg_down_stride=self.avg_down_stride,
+            **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6c4c08d4789163edc6c5e78aa8306e006fdd25e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnet.py
@@ -0,0 +1,688 @@
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer,
+                      constant_init, kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmseg.utils import get_root_logger
+from ..builder import BACKBONES
+from ..utils import ResLayer
+
+
+class BasicBlock(nn.Module):
+    """Basic block for ResNet."""
+
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 dilation=1,
+                 downsample=None,
+                 style='pytorch',
+                 with_cp=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 dcn=None,
+                 plugins=None):
+        super(BasicBlock, self).__init__()
+        assert dcn is None, 'Not implemented yet.'
+        assert plugins is None, 'Not implemented yet.'
+
+        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
+        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
+
+        self.conv1 = build_conv_layer(
+            conv_cfg,
+            inplanes,
+            planes,
+            3,
+            stride=stride,
+            padding=dilation,
+            dilation=dilation,
+            bias=False)
+        self.add_module(self.norm1_name, norm1)
+        self.conv2 = build_conv_layer(
+            conv_cfg, planes, planes, 3, padding=1, bias=False)
+        self.add_module(self.norm2_name, norm2)
+
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+        self.with_cp = with_cp
+
+    @property
+    def norm1(self):
+        """nn.Module: normalization layer after the first convolution layer"""
+        return getattr(self, self.norm1_name)
+
+    @property
+    def norm2(self):
+        """nn.Module: normalization layer after the second convolution layer"""
+        return getattr(self, self.norm2_name)
+
+    def forward(self, x):
+        """Forward function."""
+
+        def _inner_forward(x):
+            identity = x
+
+            out = self.conv1(x)
+            out = self.norm1(out)
+            out = self.relu(out)
+
+            out = self.conv2(out)
+            out = self.norm2(out)
+
+            if self.downsample is not None:
+                identity = self.downsample(x)
+
+            out += identity
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    """Bottleneck block for ResNet.
+
+    If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is
+    "caffe", the stride-two layer is the first 1x1 conv layer.
+    """
+
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 dilation=1,
+                 downsample=None,
+                 style='pytorch',
+                 with_cp=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 dcn=None,
+                 plugins=None):
+        super(Bottleneck, self).__init__()
+        assert style in ['pytorch', 'caffe']
+        assert dcn is None or isinstance(dcn, dict)
+        assert plugins is None or isinstance(plugins, list)
+        if plugins is not None:
+            allowed_position = ['after_conv1', 'after_conv2', 'after_conv3']
+            assert all(p['position'] in allowed_position for p in plugins)
+
+        self.inplanes = inplanes
+        self.planes = planes
+        self.stride = stride
+        self.dilation = dilation
+        self.style = style
+        self.with_cp = with_cp
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.dcn = dcn
+        self.with_dcn = dcn is not None
+        self.plugins = plugins
+        self.with_plugins = plugins is not None
+
+        if self.with_plugins:
+            # collect plugins for conv1/conv2/conv3
+            self.after_conv1_plugins = [
+                plugin['cfg'] for plugin in plugins
+                if plugin['position'] == 'after_conv1'
+            ]
+            self.after_conv2_plugins = [
+                plugin['cfg'] for plugin in plugins
+                if plugin['position'] == 'after_conv2'
+            ]
+            self.after_conv3_plugins = [
+                plugin['cfg'] for plugin in plugins
+                if plugin['position'] == 'after_conv3'
+            ]
+
+        if self.style == 'pytorch':
+            self.conv1_stride = 1
+            self.conv2_stride = stride
+        else:
+            self.conv1_stride = stride
+            self.conv2_stride = 1
+
+        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
+        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
+        self.norm3_name, norm3 = build_norm_layer(
+            norm_cfg, planes * self.expansion, postfix=3)
+
+        self.conv1 = build_conv_layer(
+            conv_cfg,
+            inplanes,
+            planes,
+            kernel_size=1,
+            stride=self.conv1_stride,
+            bias=False)
+        self.add_module(self.norm1_name, norm1)
+        fallback_on_stride = False
+        if self.with_dcn:
+            fallback_on_stride = dcn.pop('fallback_on_stride', False)
+        if not self.with_dcn or fallback_on_stride:
+            self.conv2 = build_conv_layer(
+                conv_cfg,
+                planes,
+                planes,
+                kernel_size=3,
+                stride=self.conv2_stride,
+                padding=dilation,
+                dilation=dilation,
+                bias=False)
+        else:
+            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
+            self.conv2 = build_conv_layer(
+                dcn,
+                planes,
+                planes,
+                kernel_size=3,
+                stride=self.conv2_stride,
+                padding=dilation,
+                dilation=dilation,
+                bias=False)
+
+        self.add_module(self.norm2_name, norm2)
+        self.conv3 = build_conv_layer(
+            conv_cfg,
+            planes,
+            planes * self.expansion,
+            kernel_size=1,
+            bias=False)
+        self.add_module(self.norm3_name, norm3)
+
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+
+        if self.with_plugins:
+            self.after_conv1_plugin_names = self.make_block_plugins(
+                planes, self.after_conv1_plugins)
+            self.after_conv2_plugin_names = self.make_block_plugins(
+                planes, self.after_conv2_plugins)
+            self.after_conv3_plugin_names = self.make_block_plugins(
+                planes * self.expansion, self.after_conv3_plugins)
+
+    def make_block_plugins(self, in_channels, plugins):
+        """make plugins for block.
+
+        Args:
+            in_channels (int): Input channels of plugin.
+            plugins (list[dict]): List of plugins cfg to build.
+
+        Returns:
+            list[str]: List of the names of plugin.
+        """
+        assert isinstance(plugins, list)
+        plugin_names = []
+        for plugin in plugins:
+            plugin = plugin.copy()
+            name, layer = build_plugin_layer(
+                plugin,
+                in_channels=in_channels,
+                postfix=plugin.pop('postfix', ''))
+            assert not hasattr(self, name), f'duplicate plugin {name}'
+            self.add_module(name, layer)
+            plugin_names.append(name)
+        return plugin_names
+
+    def forward_plugin(self, x, plugin_names):
+        """Forward function for plugins."""
+        out = x
+        for name in plugin_names:
+            out = getattr(self, name)(x)
+        return out
+
+    @property
+    def norm1(self):
+        """nn.Module: normalization layer after the first convolution layer"""
+        return getattr(self, self.norm1_name)
+
+    @property
+    def norm2(self):
+        """nn.Module: normalization layer after the second convolution layer"""
+        return getattr(self, self.norm2_name)
+
+    @property
+    def norm3(self):
+        """nn.Module: normalization layer after the third convolution layer"""
+        return getattr(self, self.norm3_name)
+
+    def forward(self, x):
+        """Forward function."""
+
+        def _inner_forward(x):
+            identity = x
+
+            out = self.conv1(x)
+            out = self.norm1(out)
+            out = self.relu(out)
+
+            if self.with_plugins:
+                out = self.forward_plugin(out, self.after_conv1_plugin_names)
+
+            out = self.conv2(out)
+            out = self.norm2(out)
+            out = self.relu(out)
+
+            if self.with_plugins:
+                out = self.forward_plugin(out, self.after_conv2_plugin_names)
+
+            out = self.conv3(out)
+            out = self.norm3(out)
+
+            if self.with_plugins:
+                out = self.forward_plugin(out, self.after_conv3_plugin_names)
+
+            if self.downsample is not None:
+                identity = self.downsample(x)
+
+            out += identity
+
+            return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        out = self.relu(out)
+
+        return out
+
+
+@BACKBONES.register_module()
+class ResNet(nn.Module):
+    """ResNet backbone.
+
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        in_channels (int): Number of input image channels. Default" 3.
+        stem_channels (int): Number of stem channels. Default: 64.
+        base_channels (int): Number of base channels of res layer. Default: 64.
+        num_stages (int): Resnet stages, normally 4.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv
+        avg_down (bool): Use AvgPool instead of stride conv when
+            downsampling in the bottleneck.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only.
+        plugins (list[dict]): List of plugins for stages, each dict contains:
+
+            - cfg (dict, required): Cfg dict to build plugin.
+
+            - position (str, required): Position inside block to insert plugin,
+            options: 'after_conv1', 'after_conv2', 'after_conv3'.
+
+            - stages (tuple[bool], optional): Stages to apply plugin, length
+            should be same as 'num_stages'
+        multi_grid (Sequence[int]|None): Multi grid dilation rates of last
+            stage. Default: None
+        contract_dilation (bool): Whether contract first dilation of each layer
+            Default: False
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+        zero_init_residual (bool): Whether to use zero init for last norm layer
+            in resblocks to let them behave as identity.
+
+    Example:
+        >>> from mmseg.models import ResNet
+        >>> import torch
+        >>> self = ResNet(depth=18)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 3, 32, 32)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_out in level_outputs:
+        ...     print(tuple(level_out.shape))
+        (1, 64, 8, 8)
+        (1, 128, 4, 4)
+        (1, 256, 2, 2)
+        (1, 512, 1, 1)
+    """
+
+    arch_settings = {
+        18: (BasicBlock, (2, 2, 2, 2)),
+        34: (BasicBlock, (3, 4, 6, 3)),
+        50: (Bottleneck, (3, 4, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3))
+    }
+
+    def __init__(self,
+                 depth,
+                 in_channels=3,
+                 stem_channels=64,
+                 base_channels=64,
+                 num_stages=4,
+                 strides=(1, 2, 2, 2),
+                 dilations=(1, 1, 1, 1),
+                 out_indices=(0, 1, 2, 3),
+                 style='pytorch',
+                 deep_stem=False,
+                 avg_down=False,
+                 frozen_stages=-1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 norm_eval=False,
+                 dcn=None,
+                 stage_with_dcn=(False, False, False, False),
+                 plugins=None,
+                 multi_grid=None,
+                 contract_dilation=False,
+                 with_cp=False,
+                 zero_init_residual=True):
+        super(ResNet, self).__init__()
+        if depth not in self.arch_settings:
+            raise KeyError(f'invalid depth {depth} for resnet')
+        self.depth = depth
+        self.stem_channels = stem_channels
+        self.base_channels = base_channels
+        self.num_stages = num_stages
+        assert num_stages >= 1 and num_stages <= 4
+        self.strides = strides
+        self.dilations = dilations
+        assert len(strides) == len(dilations) == num_stages
+        self.out_indices = out_indices
+        assert max(out_indices) < num_stages
+        self.style = style
+        self.deep_stem = deep_stem
+        self.avg_down = avg_down
+        self.frozen_stages = frozen_stages
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.with_cp = with_cp
+        self.norm_eval = norm_eval
+        self.dcn = dcn
+        self.stage_with_dcn = stage_with_dcn
+        if dcn is not None:
+            assert len(stage_with_dcn) == num_stages
+        self.plugins = plugins
+        self.multi_grid = multi_grid
+        self.contract_dilation = contract_dilation
+        self.zero_init_residual = zero_init_residual
+        self.block, stage_blocks = self.arch_settings[depth]
+        self.stage_blocks = stage_blocks[:num_stages]
+        self.inplanes = stem_channels
+
+        self._make_stem_layer(in_channels, stem_channels)
+
+        self.res_layers = []
+        for i, num_blocks in enumerate(self.stage_blocks):
+            stride = strides[i]
+            dilation = dilations[i]
+            dcn = self.dcn if self.stage_with_dcn[i] else None
+            if plugins is not None:
+                stage_plugins = self.make_stage_plugins(plugins, i)
+            else:
+                stage_plugins = None
+            # multi grid is applied to last layer only
+            stage_multi_grid = multi_grid if i == len(
+                self.stage_blocks) - 1 else None
+            planes = base_channels * 2**i
+            res_layer = self.make_res_layer(
+                block=self.block,
+                inplanes=self.inplanes,
+                planes=planes,
+                num_blocks=num_blocks,
+                stride=stride,
+                dilation=dilation,
+                style=self.style,
+                avg_down=self.avg_down,
+                with_cp=with_cp,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                dcn=dcn,
+                plugins=stage_plugins,
+                multi_grid=stage_multi_grid,
+                contract_dilation=contract_dilation)
+            self.inplanes = planes * self.block.expansion
+            layer_name = f'layer{i+1}'
+            self.add_module(layer_name, res_layer)
+            self.res_layers.append(layer_name)
+
+        self._freeze_stages()
+
+        self.feat_dim = self.block.expansion * base_channels * 2**(
+            len(self.stage_blocks) - 1)
+
+    def make_stage_plugins(self, plugins, stage_idx):
+        """make plugins for ResNet 'stage_idx'th stage .
+
+        Currently we support to insert 'context_block',
+        'empirical_attention_block', 'nonlocal_block' into the backbone like
+        ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of
+        Bottleneck.
+
+        An example of plugins format could be :
+        >>> plugins=[
+        ...     dict(cfg=dict(type='xxx', arg1='xxx'),
+        ...          stages=(False, True, True, True),
+        ...          position='after_conv2'),
+        ...     dict(cfg=dict(type='yyy'),
+        ...          stages=(True, True, True, True),
+        ...          position='after_conv3'),
+        ...     dict(cfg=dict(type='zzz', postfix='1'),
+        ...          stages=(True, True, True, True),
+        ...          position='after_conv3'),
+        ...     dict(cfg=dict(type='zzz', postfix='2'),
+        ...          stages=(True, True, True, True),
+        ...          position='after_conv3')
+        ... ]
+        >>> self = ResNet(depth=18)
+        >>> stage_plugins = self.make_stage_plugins(plugins, 0)
+        >>> assert len(stage_plugins) == 3
+
+        Suppose 'stage_idx=0', the structure of blocks in the stage would be:
+            conv1-> conv2->conv3->yyy->zzz1->zzz2
+        Suppose 'stage_idx=1', the structure of blocks in the stage would be:
+            conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2
+
+        If stages is missing, the plugin would be applied to all stages.
+
+        Args:
+            plugins (list[dict]): List of plugins cfg to build. The postfix is
+                required if multiple same type plugins are inserted.
+            stage_idx (int): Index of stage to build
+
+        Returns:
+            list[dict]: Plugins for current stage
+        """
+        stage_plugins = []
+        for plugin in plugins:
+            plugin = plugin.copy()
+            stages = plugin.pop('stages', None)
+            assert stages is None or len(stages) == self.num_stages
+            # whether to insert plugin into current stage
+            if stages is None or stages[stage_idx]:
+                stage_plugins.append(plugin)
+
+        return stage_plugins
+
+    def make_res_layer(self, **kwargs):
+        """Pack all blocks in a stage into a ``ResLayer``."""
+        return ResLayer(**kwargs)
+
+    @property
+    def norm1(self):
+        """nn.Module: the normalization layer named "norm1" """
+        return getattr(self, self.norm1_name)
+
+    def _make_stem_layer(self, in_channels, stem_channels):
+        """Make stem layer for ResNet."""
+        if self.deep_stem:
+            self.stem = nn.Sequential(
+                build_conv_layer(
+                    self.conv_cfg,
+                    in_channels,
+                    stem_channels // 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
+                nn.ReLU(inplace=True),
+                build_conv_layer(
+                    self.conv_cfg,
+                    stem_channels // 2,
+                    stem_channels // 2,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
+                nn.ReLU(inplace=True),
+                build_conv_layer(
+                    self.conv_cfg,
+                    stem_channels // 2,
+                    stem_channels,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=False),
+                build_norm_layer(self.norm_cfg, stem_channels)[1],
+                nn.ReLU(inplace=True))
+        else:
+            self.conv1 = build_conv_layer(
+                self.conv_cfg,
+                in_channels,
+                stem_channels,
+                kernel_size=7,
+                stride=2,
+                padding=3,
+                bias=False)
+            self.norm1_name, norm1 = build_norm_layer(
+                self.norm_cfg, stem_channels, postfix=1)
+            self.add_module(self.norm1_name, norm1)
+            self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+    def _freeze_stages(self):
+        """Freeze stages param and norm stats."""
+        if self.frozen_stages >= 0:
+            if self.deep_stem:
+                self.stem.eval()
+                for param in self.stem.parameters():
+                    param.requires_grad = False
+            else:
+                self.norm1.eval()
+                for m in [self.conv1, self.norm1]:
+                    for param in m.parameters():
+                        param.requires_grad = False
+
+        for i in range(1, self.frozen_stages + 1):
+            m = getattr(self, f'layer{i}')
+            m.eval()
+            for param in m.parameters():
+                param.requires_grad = False
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+
+            if self.dcn is not None:
+                for m in self.modules():
+                    if isinstance(m, Bottleneck) and hasattr(
+                            m, 'conv2_offset'):
+                        constant_init(m.conv2_offset, 0)
+
+            if self.zero_init_residual:
+                for m in self.modules():
+                    if isinstance(m, Bottleneck):
+                        constant_init(m.norm3, 0)
+                    elif isinstance(m, BasicBlock):
+                        constant_init(m.norm2, 0)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Forward function."""
+        if self.deep_stem:
+            x = self.stem(x)
+        else:
+            x = self.conv1(x)
+            x = self.norm1(x)
+            x = self.relu(x)
+        x = self.maxpool(x)
+        outs = []
+        for i, layer_name in enumerate(self.res_layers):
+            res_layer = getattr(self, layer_name)
+            x = res_layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return tuple(outs)
+
+    def train(self, mode=True):
+        """Convert the model into training mode while keep normalization layer
+        freezed."""
+        super(ResNet, self).train(mode)
+        self._freeze_stages()
+        if mode and self.norm_eval:
+            for m in self.modules():
+                # trick: eval have effect on BatchNorm only
+                if isinstance(m, _BatchNorm):
+                    m.eval()
+
+
+@BACKBONES.register_module()
+class ResNetV1c(ResNet):
+    """ResNetV1c variant described in [1]_.
+
+    Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv
+    in the input stem with three 3x3 convs.
+
+    References:
+        .. [1] https://arxiv.org/pdf/1812.01187.pdf
+    """
+
+    def __init__(self, **kwargs):
+        super(ResNetV1c, self).__init__(
+            deep_stem=True, avg_down=False, **kwargs)
+
+
+@BACKBONES.register_module()
+class ResNetV1d(ResNet):
+    """ResNetV1d variant described in [1]_.
+
+    Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in
+    the input stem with three 3x3 convs. And in the downsampling block, a 2x2
+    avg_pool with stride 2 is added before conv, whose stride is changed to 1.
+    """
+
+    def __init__(self, **kwargs):
+        super(ResNetV1d, self).__init__(
+            deep_stem=True, avg_down=True, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnext.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa8149ce2fcbf67703248621c4f5dfb90a852379
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/resnext.py
@@ -0,0 +1,145 @@
+import math
+
+from mmcv.cnn import build_conv_layer, build_norm_layer
+
+from ..builder import BACKBONES
+from ..utils import ResLayer
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResNet
+
+
+class Bottleneck(_Bottleneck):
+    """Bottleneck block for ResNeXt.
+
+    If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is
+    "caffe", the stride-two layer is the first 1x1 conv layer.
+    """
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 groups=1,
+                 base_width=4,
+                 base_channels=64,
+                 **kwargs):
+        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
+
+        if groups == 1:
+            width = self.planes
+        else:
+            width = math.floor(self.planes *
+                               (base_width / base_channels)) * groups
+
+        self.norm1_name, norm1 = build_norm_layer(
+            self.norm_cfg, width, postfix=1)
+        self.norm2_name, norm2 = build_norm_layer(
+            self.norm_cfg, width, postfix=2)
+        self.norm3_name, norm3 = build_norm_layer(
+            self.norm_cfg, self.planes * self.expansion, postfix=3)
+
+        self.conv1 = build_conv_layer(
+            self.conv_cfg,
+            self.inplanes,
+            width,
+            kernel_size=1,
+            stride=self.conv1_stride,
+            bias=False)
+        self.add_module(self.norm1_name, norm1)
+        fallback_on_stride = False
+        self.with_modulated_dcn = False
+        if self.with_dcn:
+            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
+        if not self.with_dcn or fallback_on_stride:
+            self.conv2 = build_conv_layer(
+                self.conv_cfg,
+                width,
+                width,
+                kernel_size=3,
+                stride=self.conv2_stride,
+                padding=self.dilation,
+                dilation=self.dilation,
+                groups=groups,
+                bias=False)
+        else:
+            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
+            self.conv2 = build_conv_layer(
+                self.dcn,
+                width,
+                width,
+                kernel_size=3,
+                stride=self.conv2_stride,
+                padding=self.dilation,
+                dilation=self.dilation,
+                groups=groups,
+                bias=False)
+
+        self.add_module(self.norm2_name, norm2)
+        self.conv3 = build_conv_layer(
+            self.conv_cfg,
+            width,
+            self.planes * self.expansion,
+            kernel_size=1,
+            bias=False)
+        self.add_module(self.norm3_name, norm3)
+
+
+@BACKBONES.register_module()
+class ResNeXt(ResNet):
+    """ResNeXt backbone.
+
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        in_channels (int): Number of input image channels. Normally 3.
+        num_stages (int): Resnet stages, normally 4.
+        groups (int): Group of resnext.
+        base_width (int): Base width of resnext.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+        zero_init_residual (bool): whether to use zero init for last norm layer
+            in resblocks to let them behave as identity.
+
+    Example:
+        >>> from mmseg.models import ResNeXt
+        >>> import torch
+        >>> self = ResNeXt(depth=50)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 3, 32, 32)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_out in level_outputs:
+        ...     print(tuple(level_out.shape))
+        (1, 256, 8, 8)
+        (1, 512, 4, 4)
+        (1, 1024, 2, 2)
+        (1, 2048, 1, 1)
+    """
+
+    arch_settings = {
+        50: (Bottleneck, (3, 4, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3))
+    }
+
+    def __init__(self, groups=1, base_width=4, **kwargs):
+        self.groups = groups
+        self.base_width = base_width
+        super(ResNeXt, self).__init__(**kwargs)
+
+    def make_res_layer(self, **kwargs):
+        """Pack all blocks in a stage into a ``ResLayer``"""
+        return ResLayer(
+            groups=self.groups,
+            base_width=self.base_width,
+            base_channels=self.base_channels,
+            **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/unet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e1b001c82174b9bdb53a1fa9f6bb8a869172a86
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/backbones/unet.py
@@ -0,0 +1,428 @@
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import (UPSAMPLE_LAYERS, ConvModule, build_activation_layer,
+                      build_norm_layer, constant_init, kaiming_init)
+from mmcv.runner import load_checkpoint
+from mmcv.utils.parrots_wrapper import _BatchNorm
+
+from mmseg.utils import get_root_logger
+from ..builder import BACKBONES
+from ..utils import UpConvBlock
+
+
+class BasicConvBlock(nn.Module):
+    """Basic convolutional block for UNet.
+
+    This module consists of several plain convolutional layers.
+
+    Args:
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        num_convs (int): Number of convolutional layers. Default: 2.
+        stride (int): Whether use stride convolution to downsample
+            the input feature map. If stride=2, it only uses stride convolution
+            in the first convolutional layer to downsample the input feature
+            map. Options are 1 or 2. Default: 1.
+        dilation (int): Whether use dilated convolution to expand the
+            receptive field. Set dilation rate of each convolutional layer and
+            the dilation rate of the first convolutional layer is always 1.
+            Default: 1.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+        conv_cfg (dict | None): Config dict for convolution layer.
+            Default: None.
+        norm_cfg (dict | None): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict | None): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU').
+        dcn (bool): Use deformable convoluton in convolutional layer or not.
+            Default: None.
+        plugins (dict): plugins for convolutional layers. Default: None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_convs=2,
+                 stride=1,
+                 dilation=1,
+                 with_cp=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 dcn=None,
+                 plugins=None):
+        super(BasicConvBlock, self).__init__()
+        assert dcn is None, 'Not implemented yet.'
+        assert plugins is None, 'Not implemented yet.'
+
+        self.with_cp = with_cp
+        convs = []
+        for i in range(num_convs):
+            convs.append(
+                ConvModule(
+                    in_channels=in_channels if i == 0 else out_channels,
+                    out_channels=out_channels,
+                    kernel_size=3,
+                    stride=stride if i == 0 else 1,
+                    dilation=1 if i == 0 else dilation,
+                    padding=1 if i == 0 else dilation,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+
+        self.convs = nn.Sequential(*convs)
+
+    def forward(self, x):
+        """Forward function."""
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(self.convs, x)
+        else:
+            out = self.convs(x)
+        return out
+
+
+@UPSAMPLE_LAYERS.register_module()
+class DeconvModule(nn.Module):
+    """Deconvolution upsample module in decoder for UNet (2X upsample).
+
+    This module uses deconvolution to upsample feature map in the decoder
+    of UNet.
+
+    Args:
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+        norm_cfg (dict | None): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict | None): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU').
+        kernel_size (int): Kernel size of the convolutional layer. Default: 4.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 with_cp=False,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 *,
+                 kernel_size=4,
+                 scale_factor=2):
+        super(DeconvModule, self).__init__()
+
+        assert (kernel_size - scale_factor >= 0) and\
+               (kernel_size - scale_factor) % 2 == 0,\
+               f'kernel_size should be greater than or equal to scale_factor '\
+               f'and (kernel_size - scale_factor) should be even numbers, '\
+               f'while the kernel size is {kernel_size} and scale_factor is '\
+               f'{scale_factor}.'
+
+        stride = scale_factor
+        padding = (kernel_size - scale_factor) // 2
+        self.with_cp = with_cp
+        deconv = nn.ConvTranspose2d(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding)
+
+        norm_name, norm = build_norm_layer(norm_cfg, out_channels)
+        activate = build_activation_layer(act_cfg)
+        self.deconv_upsamping = nn.Sequential(deconv, norm, activate)
+
+    def forward(self, x):
+        """Forward function."""
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(self.deconv_upsamping, x)
+        else:
+            out = self.deconv_upsamping(x)
+        return out
+
+
+@UPSAMPLE_LAYERS.register_module()
+class InterpConv(nn.Module):
+    """Interpolation upsample module in decoder for UNet.
+
+    This module uses interpolation to upsample feature map in the decoder
+    of UNet. It consists of one interpolation upsample layer and one
+    convolutional layer. It can be one interpolation upsample layer followed
+    by one convolutional layer (conv_first=False) or one convolutional layer
+    followed by one interpolation upsample layer (conv_first=True).
+
+    Args:
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+        norm_cfg (dict | None): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict | None): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU').
+        conv_cfg (dict | None): Config dict for convolution layer.
+            Default: None.
+        conv_first (bool): Whether convolutional layer or interpolation
+            upsample layer first. Default: False. It means interpolation
+            upsample layer followed by one convolutional layer.
+        kernel_size (int): Kernel size of the convolutional layer. Default: 1.
+        stride (int): Stride of the convolutional layer. Default: 1.
+        padding (int): Padding of the convolutional layer. Default: 1.
+        upsampe_cfg (dict): Interpolation config of the upsample layer.
+            Default: dict(
+                scale_factor=2, mode='bilinear', align_corners=False).
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 with_cp=False,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 *,
+                 conv_cfg=None,
+                 conv_first=False,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0,
+                 upsampe_cfg=dict(
+                     scale_factor=2, mode='bilinear', align_corners=False)):
+        super(InterpConv, self).__init__()
+
+        self.with_cp = with_cp
+        conv = ConvModule(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        upsample = nn.Upsample(**upsampe_cfg)
+        if conv_first:
+            self.interp_upsample = nn.Sequential(conv, upsample)
+        else:
+            self.interp_upsample = nn.Sequential(upsample, conv)
+
+    def forward(self, x):
+        """Forward function."""
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(self.interp_upsample, x)
+        else:
+            out = self.interp_upsample(x)
+        return out
+
+
+@BACKBONES.register_module()
+class UNet(nn.Module):
+    """UNet backbone.
+    U-Net: Convolutional Networks for Biomedical Image Segmentation.
+    https://arxiv.org/pdf/1505.04597.pdf
+
+    Args:
+        in_channels (int): Number of input image channels. Default" 3.
+        base_channels (int): Number of base channels of each stage.
+            The output channels of the first stage. Default: 64.
+        num_stages (int): Number of stages in encoder, normally 5. Default: 5.
+        strides (Sequence[int 1 | 2]): Strides of each stage in encoder.
+            len(strides) is equal to num_stages. Normally the stride of the
+            first stage in encoder is 1. If strides[i]=2, it uses stride
+            convolution to downsample in the correspondance encoder stage.
+            Default: (1, 1, 1, 1, 1).
+        enc_num_convs (Sequence[int]): Number of convolutional layers in the
+            convolution block of the correspondance encoder stage.
+            Default: (2, 2, 2, 2, 2).
+        dec_num_convs (Sequence[int]): Number of convolutional layers in the
+            convolution block of the correspondance decoder stage.
+            Default: (2, 2, 2, 2).
+        downsamples (Sequence[int]): Whether use MaxPool to downsample the
+            feature map after the first stage of encoder
+            (stages: [1, num_stages)). If the correspondance encoder stage use
+            stride convolution (strides[i]=2), it will never use MaxPool to
+            downsample, even downsamples[i-1]=True.
+            Default: (True, True, True, True).
+        enc_dilations (Sequence[int]): Dilation rate of each stage in encoder.
+            Default: (1, 1, 1, 1, 1).
+        dec_dilations (Sequence[int]): Dilation rate of each stage in decoder.
+            Default: (1, 1, 1, 1).
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+        conv_cfg (dict | None): Config dict for convolution layer.
+            Default: None.
+        norm_cfg (dict | None): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict | None): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU').
+        upsample_cfg (dict): The upsample config of the upsample module in
+            decoder. Default: dict(type='InterpConv').
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only. Default: False.
+        dcn (bool): Use deformable convoluton in convolutional layer or not.
+            Default: None.
+        plugins (dict): plugins for convolutional layers. Default: None.
+
+    Notice:
+        The input image size should be devisible by the whole downsample rate
+        of the encoder. More detail of the whole downsample rate can be found
+        in UNet._check_input_devisible.
+
+    """
+
+    def __init__(self,
+                 in_channels=3,
+                 base_channels=64,
+                 num_stages=5,
+                 strides=(1, 1, 1, 1, 1),
+                 enc_num_convs=(2, 2, 2, 2, 2),
+                 dec_num_convs=(2, 2, 2, 2),
+                 downsamples=(True, True, True, True),
+                 enc_dilations=(1, 1, 1, 1, 1),
+                 dec_dilations=(1, 1, 1, 1),
+                 with_cp=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='InterpConv'),
+                 norm_eval=False,
+                 dcn=None,
+                 plugins=None):
+        super(UNet, self).__init__()
+        assert dcn is None, 'Not implemented yet.'
+        assert plugins is None, 'Not implemented yet.'
+        assert len(strides) == num_stages, \
+            'The length of strides should be equal to num_stages, '\
+            f'while the strides is {strides}, the length of '\
+            f'strides is {len(strides)}, and the num_stages is '\
+            f'{num_stages}.'
+        assert len(enc_num_convs) == num_stages, \
+            'The length of enc_num_convs should be equal to num_stages, '\
+            f'while the enc_num_convs is {enc_num_convs}, the length of '\
+            f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\
+            f'{num_stages}.'
+        assert len(dec_num_convs) == (num_stages-1), \
+            'The length of dec_num_convs should be equal to (num_stages-1), '\
+            f'while the dec_num_convs is {dec_num_convs}, the length of '\
+            f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\
+            f'{num_stages}.'
+        assert len(downsamples) == (num_stages-1), \
+            'The length of downsamples should be equal to (num_stages-1), '\
+            f'while the downsamples is {downsamples}, the length of '\
+            f'downsamples is {len(downsamples)}, and the num_stages is '\
+            f'{num_stages}.'
+        assert len(enc_dilations) == num_stages, \
+            'The length of enc_dilations should be equal to num_stages, '\
+            f'while the enc_dilations is {enc_dilations}, the length of '\
+            f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\
+            f'{num_stages}.'
+        assert len(dec_dilations) == (num_stages-1), \
+            'The length of dec_dilations should be equal to (num_stages-1), '\
+            f'while the dec_dilations is {dec_dilations}, the length of '\
+            f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\
+            f'{num_stages}.'
+        self.num_stages = num_stages
+        self.strides = strides
+        self.downsamples = downsamples
+        self.norm_eval = norm_eval
+
+        self.encoder = nn.ModuleList()
+        self.decoder = nn.ModuleList()
+
+        for i in range(num_stages):
+            enc_conv_block = []
+            if i != 0:
+                if strides[i] == 1 and downsamples[i - 1]:
+                    enc_conv_block.append(nn.MaxPool2d(kernel_size=2))
+                upsample = (strides[i] != 1 or downsamples[i - 1])
+                self.decoder.append(
+                    UpConvBlock(
+                        conv_block=BasicConvBlock,
+                        in_channels=base_channels * 2**i,
+                        skip_channels=base_channels * 2**(i - 1),
+                        out_channels=base_channels * 2**(i - 1),
+                        num_convs=dec_num_convs[i - 1],
+                        stride=1,
+                        dilation=dec_dilations[i - 1],
+                        with_cp=with_cp,
+                        conv_cfg=conv_cfg,
+                        norm_cfg=norm_cfg,
+                        act_cfg=act_cfg,
+                        upsample_cfg=upsample_cfg if upsample else None,
+                        dcn=None,
+                        plugins=None))
+
+            enc_conv_block.append(
+                BasicConvBlock(
+                    in_channels=in_channels,
+                    out_channels=base_channels * 2**i,
+                    num_convs=enc_num_convs[i],
+                    stride=strides[i],
+                    dilation=enc_dilations[i],
+                    with_cp=with_cp,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg,
+                    dcn=None,
+                    plugins=None))
+            self.encoder.append((nn.Sequential(*enc_conv_block)))
+            in_channels = base_channels * 2**i
+
+    def forward(self, x):
+        self._check_input_devisible(x)
+        enc_outs = []
+        for enc in self.encoder:
+            x = enc(x)
+            enc_outs.append(x)
+        dec_outs = [x]
+        for i in reversed(range(len(self.decoder))):
+            x = self.decoder[i](enc_outs[i], x)
+            dec_outs.append(x)
+
+        return dec_outs
+
+    def train(self, mode=True):
+        """Convert the model into training mode while keep normalization layer
+        freezed."""
+        super(UNet, self).train(mode)
+        if mode and self.norm_eval:
+            for m in self.modules():
+                # trick: eval have effect on BatchNorm only
+                if isinstance(m, _BatchNorm):
+                    m.eval()
+
+    def _check_input_devisible(self, x):
+        h, w = x.shape[-2:]
+        whole_downsample_rate = 1
+        for i in range(1, self.num_stages):
+            if self.strides[i] == 2 or self.downsamples[i - 1]:
+                whole_downsample_rate *= 2
+        assert (h % whole_downsample_rate == 0) \
+            and (w % whole_downsample_rate == 0),\
+            f'The input image size {(h, w)} should be devisible by the whole '\
+            f'downsample rate {whole_downsample_rate}, when num_stages is '\
+            f'{self.num_stages}, strides is {self.strides}, and downsamples '\
+            f'is {self.downsamples}.'
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    kaiming_init(m)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/builder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..c487dcdd32bb1c62630f6d8f75e5fa981aba3ba8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/builder.py
@@ -0,0 +1,66 @@
+import warnings
+
+from mmcv.utils import Registry, build_from_cfg
+from torch import nn
+
+BACKBONES = Registry('backbone')
+NECKS = Registry('neck')
+HEADS = Registry('head')
+LOSSES = Registry('loss')
+SEGMENTORS = Registry('segmentor')
+
+
+def build(cfg, registry, default_args=None):
+    """Build a module.
+
+    Args:
+        cfg (dict, list[dict]): The config of modules, is is either a dict
+            or a list of configs.
+        registry (:obj:`Registry`): A registry the module belongs to.
+        default_args (dict, optional): Default arguments to build the module.
+            Defaults to None.
+
+    Returns:
+        nn.Module: A built nn module.
+    """
+
+    if isinstance(cfg, list):
+        modules = [
+            build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
+        ]
+        return nn.Sequential(*modules)
+    else:
+        return build_from_cfg(cfg, registry, default_args)
+
+
+def build_backbone(cfg):
+    """Build backbone."""
+    return build(cfg, BACKBONES)
+
+
+def build_neck(cfg):
+    """Build neck."""
+    return build(cfg, NECKS)
+
+
+def build_head(cfg):
+    """Build head."""
+    return build(cfg, HEADS)
+
+
+def build_loss(cfg):
+    """Build loss."""
+    return build(cfg, LOSSES)
+
+
+def build_segmentor(cfg, train_cfg=None, test_cfg=None):
+    """Build segmentor."""
+    if train_cfg is not None or test_cfg is not None:
+        warnings.warn(
+            'train_cfg and test_cfg is deprecated, '
+            'please specify them in model', UserWarning)
+    assert cfg.get('train_cfg') is None or train_cfg is None, \
+        'train_cfg specified in both outer field and model field '
+    assert cfg.get('test_cfg') is None or test_cfg is None, \
+        'test_cfg specified in both outer field and model field '
+    return build(cfg, SEGMENTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b702eb1deeb2fa2a2a35aadad35a56f63402973
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/__init__.py
@@ -0,0 +1,32 @@
+from .ann_head import ANNHead
+from .apc_head import APCHead
+from .aspp_head import ASPPHead
+from .cc_head import CCHead
+from .da_head import DAHead
+from .dm_head import DMHead
+from .dnl_head import DNLHead
+from .ema_head import EMAHead
+from .enc_head import EncHead
+from .fcn_head import FCNHead
+from .fpn_head import FPNHead
+from .gc_head import GCHead
+from .lraspp_head import LRASPPHead
+from .nl_head import NLHead
+from .ocr_head import OCRHead
+from .point_head import PointHead
+from .psa_head import PSAHead
+from .psp_head import PSPHead
+from .sep_aspp_head import DepthwiseSeparableASPPHead
+from .sep_fcn_head import DepthwiseSeparableFCNHead
+from .uper_head import UPerHead
+
+
+from .segformer_head import SegFormerHead
+
+__all__ = [
+    'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
+    'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
+    'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead',
+    'PointHead', 'APCHead', 'DMHead', 'LRASPPHead',
+    'SegFormerHead',
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ann_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ann_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..396c54e1505b3e1750abb715769c6d9c040d9906
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ann_head.py
@@ -0,0 +1,245 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from ..builder import HEADS
+from ..utils import SelfAttentionBlock as _SelfAttentionBlock
+from .decode_head import BaseDecodeHead
+
+
+class PPMConcat(nn.ModuleList):
+    """Pyramid Pooling Module that only concat the features of each layer.
+
+    Args:
+        pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module.
+    """
+
+    def __init__(self, pool_scales=(1, 3, 6, 8)):
+        super(PPMConcat, self).__init__(
+            [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales])
+
+    def forward(self, feats):
+        """Forward function."""
+        ppm_outs = []
+        for ppm in self:
+            ppm_out = ppm(feats)
+            ppm_outs.append(ppm_out.view(*feats.shape[:2], -1))
+        concat_outs = torch.cat(ppm_outs, dim=2)
+        return concat_outs
+
+
+class SelfAttentionBlock(_SelfAttentionBlock):
+    """Make a ANN used SelfAttentionBlock.
+
+    Args:
+        low_in_channels (int): Input channels of lower level feature,
+            which is the key feature for self-attention.
+        high_in_channels (int): Input channels of higher level feature,
+            which is the query feature for self-attention.
+        channels (int): Output channels of key/query transform.
+        out_channels (int): Output channels.
+        share_key_query (bool): Whether share projection weight between key
+            and query projection.
+        query_scale (int): The scale of query feature map.
+        key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module of key feature.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict|None): Config of activation layers.
+    """
+
+    def __init__(self, low_in_channels, high_in_channels, channels,
+                 out_channels, share_key_query, query_scale, key_pool_scales,
+                 conv_cfg, norm_cfg, act_cfg):
+        key_psp = PPMConcat(key_pool_scales)
+        if query_scale > 1:
+            query_downsample = nn.MaxPool2d(kernel_size=query_scale)
+        else:
+            query_downsample = None
+        super(SelfAttentionBlock, self).__init__(
+            key_in_channels=low_in_channels,
+            query_in_channels=high_in_channels,
+            channels=channels,
+            out_channels=out_channels,
+            share_key_query=share_key_query,
+            query_downsample=query_downsample,
+            key_downsample=key_psp,
+            key_query_num_convs=1,
+            key_query_norm=True,
+            value_out_num_convs=1,
+            value_out_norm=False,
+            matmul_norm=True,
+            with_out=True,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+
+
+class AFNB(nn.Module):
+    """Asymmetric Fusion Non-local Block(AFNB)
+
+    Args:
+        low_in_channels (int): Input channels of lower level feature,
+            which is the key feature for self-attention.
+        high_in_channels (int): Input channels of higher level feature,
+            which is the query feature for self-attention.
+        channels (int): Output channels of key/query transform.
+        out_channels (int): Output channels.
+            and query projection.
+        query_scales (tuple[int]): The scales of query feature map.
+            Default: (1,)
+        key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module of key feature.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict|None): Config of activation layers.
+    """
+
+    def __init__(self, low_in_channels, high_in_channels, channels,
+                 out_channels, query_scales, key_pool_scales, conv_cfg,
+                 norm_cfg, act_cfg):
+        super(AFNB, self).__init__()
+        self.stages = nn.ModuleList()
+        for query_scale in query_scales:
+            self.stages.append(
+                SelfAttentionBlock(
+                    low_in_channels=low_in_channels,
+                    high_in_channels=high_in_channels,
+                    channels=channels,
+                    out_channels=out_channels,
+                    share_key_query=False,
+                    query_scale=query_scale,
+                    key_pool_scales=key_pool_scales,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+        self.bottleneck = ConvModule(
+            out_channels + high_in_channels,
+            out_channels,
+            1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+
+    def forward(self, low_feats, high_feats):
+        """Forward function."""
+        priors = [stage(high_feats, low_feats) for stage in self.stages]
+        context = torch.stack(priors, dim=0).sum(dim=0)
+        output = self.bottleneck(torch.cat([context, high_feats], 1))
+        return output
+
+
+class APNB(nn.Module):
+    """Asymmetric Pyramid Non-local Block (APNB)
+
+    Args:
+        in_channels (int): Input channels of key/query feature,
+            which is the key feature for self-attention.
+        channels (int): Output channels of key/query transform.
+        out_channels (int): Output channels.
+        query_scales (tuple[int]): The scales of query feature map.
+            Default: (1,)
+        key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module of key feature.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict|None): Config of activation layers.
+    """
+
+    def __init__(self, in_channels, channels, out_channels, query_scales,
+                 key_pool_scales, conv_cfg, norm_cfg, act_cfg):
+        super(APNB, self).__init__()
+        self.stages = nn.ModuleList()
+        for query_scale in query_scales:
+            self.stages.append(
+                SelfAttentionBlock(
+                    low_in_channels=in_channels,
+                    high_in_channels=in_channels,
+                    channels=channels,
+                    out_channels=out_channels,
+                    share_key_query=True,
+                    query_scale=query_scale,
+                    key_pool_scales=key_pool_scales,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+        self.bottleneck = ConvModule(
+            2 * in_channels,
+            out_channels,
+            1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+
+    def forward(self, feats):
+        """Forward function."""
+        priors = [stage(feats, feats) for stage in self.stages]
+        context = torch.stack(priors, dim=0).sum(dim=0)
+        output = self.bottleneck(torch.cat([context, feats], 1))
+        return output
+
+
+@HEADS.register_module()
+class ANNHead(BaseDecodeHead):
+    """Asymmetric Non-local Neural Networks for Semantic Segmentation.
+
+    This head is the implementation of `ANNNet
+    <https://arxiv.org/abs/1908.07678>`_.
+
+    Args:
+        project_channels (int): Projection channels for Nonlocal.
+        query_scales (tuple[int]): The scales of query feature map.
+            Default: (1,)
+        key_pool_scales (tuple[int]): The pooling scales of key feature map.
+            Default: (1, 3, 6, 8).
+    """
+
+    def __init__(self,
+                 project_channels,
+                 query_scales=(1, ),
+                 key_pool_scales=(1, 3, 6, 8),
+                 **kwargs):
+        super(ANNHead, self).__init__(
+            input_transform='multiple_select', **kwargs)
+        assert len(self.in_channels) == 2
+        low_in_channels, high_in_channels = self.in_channels
+        self.project_channels = project_channels
+        self.fusion = AFNB(
+            low_in_channels=low_in_channels,
+            high_in_channels=high_in_channels,
+            out_channels=high_in_channels,
+            channels=project_channels,
+            query_scales=query_scales,
+            key_pool_scales=key_pool_scales,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.bottleneck = ConvModule(
+            high_in_channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.context = APNB(
+            in_channels=self.channels,
+            out_channels=self.channels,
+            channels=project_channels,
+            query_scales=query_scales,
+            key_pool_scales=key_pool_scales,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        low_feats, high_feats = self._transform_inputs(inputs)
+        output = self.fusion(low_feats, high_feats)
+        output = self.dropout(output)
+        output = self.bottleneck(output)
+        output = self.context(output)
+        output = self.cls_seg(output)
+
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/apc_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/apc_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..b453db3943b6ba853f85fa804d3675c4244d460e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/apc_head.py
@@ -0,0 +1,158 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+class ACM(nn.Module):
+    """Adaptive Context Module used in APCNet.
+
+    Args:
+        pool_scale (int): Pooling scale used in Adaptive Context
+            Module to extract region fetures.
+        fusion (bool): Add one conv to fuse residual feature.
+        in_channels (int): Input channels.
+        channels (int): Channels after modules, before conv_seg.
+        conv_cfg (dict | None): Config of conv layers.
+        norm_cfg (dict | None): Config of norm layers.
+        act_cfg (dict): Config of activation layers.
+    """
+
+    def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg,
+                 norm_cfg, act_cfg):
+        super(ACM, self).__init__()
+        self.pool_scale = pool_scale
+        self.fusion = fusion
+        self.in_channels = in_channels
+        self.channels = channels
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.pooled_redu_conv = ConvModule(
+            self.in_channels,
+            self.channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+        self.input_redu_conv = ConvModule(
+            self.in_channels,
+            self.channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+        self.global_info = ConvModule(
+            self.channels,
+            self.channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+        self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0)
+
+        self.residual_conv = ConvModule(
+            self.channels,
+            self.channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+        if self.fusion:
+            self.fusion_conv = ConvModule(
+                self.channels,
+                self.channels,
+                1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+
+    def forward(self, x):
+        """Forward function."""
+        pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale)
+        # [batch_size, channels, h, w]
+        x = self.input_redu_conv(x)
+        # [batch_size, channels, pool_scale, pool_scale]
+        pooled_x = self.pooled_redu_conv(pooled_x)
+        batch_size = x.size(0)
+        # [batch_size, pool_scale * pool_scale, channels]
+        pooled_x = pooled_x.view(batch_size, self.channels,
+                                 -1).permute(0, 2, 1).contiguous()
+        # [batch_size, h * w, pool_scale * pool_scale]
+        affinity_matrix = self.gla(x + resize(
+            self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:])
+                                   ).permute(0, 2, 3, 1).reshape(
+                                       batch_size, -1, self.pool_scale**2)
+        affinity_matrix = F.sigmoid(affinity_matrix)
+        # [batch_size, h * w, channels]
+        z_out = torch.matmul(affinity_matrix, pooled_x)
+        # [batch_size, channels, h * w]
+        z_out = z_out.permute(0, 2, 1).contiguous()
+        # [batch_size, channels, h, w]
+        z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3))
+        z_out = self.residual_conv(z_out)
+        z_out = F.relu(z_out + x)
+        if self.fusion:
+            z_out = self.fusion_conv(z_out)
+
+        return z_out
+
+
+@HEADS.register_module()
+class APCHead(BaseDecodeHead):
+    """Adaptive Pyramid Context Network for Semantic Segmentation.
+
+    This head is the implementation of
+    `APCNet <https://openaccess.thecvf.com/content_CVPR_2019/papers/\
+    He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_\
+    CVPR_2019_paper.pdf>`_.
+
+    Args:
+        pool_scales (tuple[int]): Pooling scales used in Adaptive Context
+            Module. Default: (1, 2, 3, 6).
+        fusion (bool): Add one conv to fuse residual feature.
+    """
+
+    def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs):
+        super(APCHead, self).__init__(**kwargs)
+        assert isinstance(pool_scales, (list, tuple))
+        self.pool_scales = pool_scales
+        self.fusion = fusion
+        acm_modules = []
+        for pool_scale in self.pool_scales:
+            acm_modules.append(
+                ACM(pool_scale,
+                    self.fusion,
+                    self.in_channels,
+                    self.channels,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg))
+        self.acm_modules = nn.ModuleList(acm_modules)
+        self.bottleneck = ConvModule(
+            self.in_channels + len(pool_scales) * self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        acm_outs = [x]
+        for acm_module in self.acm_modules:
+            acm_outs.append(acm_module(x))
+        acm_outs = torch.cat(acm_outs, dim=1)
+        output = self.bottleneck(acm_outs)
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/aspp_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/aspp_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..6332ab120ceb040f18ab962c74a97dd23bb17caa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/aspp_head.py
@@ -0,0 +1,107 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+class ASPPModule(nn.ModuleList):
+    """Atrous Spatial Pyramid Pooling (ASPP) Module.
+
+    Args:
+        dilations (tuple[int]): Dilation rate of each layer.
+        in_channels (int): Input channels.
+        channels (int): Channels after modules, before conv_seg.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict): Config of activation layers.
+    """
+
+    def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg,
+                 act_cfg):
+        super(ASPPModule, self).__init__()
+        self.dilations = dilations
+        self.in_channels = in_channels
+        self.channels = channels
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        for dilation in dilations:
+            self.append(
+                ConvModule(
+                    self.in_channels,
+                    self.channels,
+                    1 if dilation == 1 else 3,
+                    dilation=dilation,
+                    padding=0 if dilation == 1 else dilation,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg))
+
+    def forward(self, x):
+        """Forward function."""
+        aspp_outs = []
+        for aspp_module in self:
+            aspp_outs.append(aspp_module(x))
+
+        return aspp_outs
+
+
+@HEADS.register_module()
+class ASPPHead(BaseDecodeHead):
+    """Rethinking Atrous Convolution for Semantic Image Segmentation.
+
+    This head is the implementation of `DeepLabV3
+    <https://arxiv.org/abs/1706.05587>`_.
+
+    Args:
+        dilations (tuple[int]): Dilation rates for ASPP module.
+            Default: (1, 6, 12, 18).
+    """
+
+    def __init__(self, dilations=(1, 6, 12, 18), **kwargs):
+        super(ASPPHead, self).__init__(**kwargs)
+        assert isinstance(dilations, (list, tuple))
+        self.dilations = dilations
+        self.image_pool = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            ConvModule(
+                self.in_channels,
+                self.channels,
+                1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg))
+        self.aspp_modules = ASPPModule(
+            dilations,
+            self.in_channels,
+            self.channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.bottleneck = ConvModule(
+            (len(dilations) + 1) * self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        aspp_outs = [
+            resize(
+                self.image_pool(x),
+                size=x.size()[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+        ]
+        aspp_outs.extend(self.aspp_modules(x))
+        aspp_outs = torch.cat(aspp_outs, dim=1)
+        output = self.bottleneck(aspp_outs)
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/cascade_decode_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/cascade_decode_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..d02122ca0e68743b1bf7a893afae96042f23838c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/cascade_decode_head.py
@@ -0,0 +1,57 @@
+from abc import ABCMeta, abstractmethod
+
+from .decode_head import BaseDecodeHead
+
+
+class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta):
+    """Base class for cascade decode head used in
+    :class:`CascadeEncoderDecoder."""
+
+    def __init__(self, *args, **kwargs):
+        super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs)
+
+    @abstractmethod
+    def forward(self, inputs, prev_output):
+        """Placeholder of forward function."""
+        pass
+
+    def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg,
+                      train_cfg):
+        """Forward function for training.
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+            prev_output (Tensor): The output of previous decode head.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            gt_semantic_seg (Tensor): Semantic segmentation masks
+                used if the architecture supports semantic segmentation task.
+            train_cfg (dict): The training config.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+        seg_logits = self.forward(inputs, prev_output)
+        losses = self.losses(seg_logits, gt_semantic_seg)
+
+        return losses
+
+    def forward_test(self, inputs, prev_output, img_metas, test_cfg):
+        """Forward function for testing.
+
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+            prev_output (Tensor): The output of previous decode head.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            test_cfg (dict): The testing config.
+
+        Returns:
+            Tensor: Output segmentation map.
+        """
+        return self.forward(inputs, prev_output)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/cc_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/cc_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..95c2706a5d4d7877895146c5ebf4396a78d29a8d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/cc_head.py
@@ -0,0 +1,42 @@
+import torch
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+try:
+    from mmcv.ops import CrissCrossAttention
+except ModuleNotFoundError:
+    CrissCrossAttention = None
+
+
+@HEADS.register_module()
+class CCHead(FCNHead):
+    """CCNet: Criss-Cross Attention for Semantic Segmentation.
+
+    This head is the implementation of `CCNet
+    <https://arxiv.org/abs/1811.11721>`_.
+
+    Args:
+        recurrence (int): Number of recurrence of Criss Cross Attention
+            module. Default: 2.
+    """
+
+    def __init__(self, recurrence=2, **kwargs):
+        if CrissCrossAttention is None:
+            raise RuntimeError('Please install mmcv-full for '
+                               'CrissCrossAttention ops')
+        super(CCHead, self).__init__(num_convs=2, **kwargs)
+        self.recurrence = recurrence
+        self.cca = CrissCrossAttention(self.channels)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        output = self.convs[0](x)
+        for _ in range(self.recurrence):
+            output = self.cca(output)
+        output = self.convs[1](output)
+        if self.concat_input:
+            output = self.conv_cat(torch.cat([x, output], dim=1))
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/da_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/da_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ee0e08c3d69ee4392aa550072a043548c377571
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/da_head.py
@@ -0,0 +1,178 @@
+import torch
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, Scale
+from torch import nn
+
+from mmseg.core import add_prefix
+from ..builder import HEADS
+from ..utils import SelfAttentionBlock as _SelfAttentionBlock
+from .decode_head import BaseDecodeHead
+
+
+class PAM(_SelfAttentionBlock):
+    """Position Attention Module (PAM)
+
+    Args:
+        in_channels (int): Input channels of key/query feature.
+        channels (int): Output channels of key/query transform.
+    """
+
+    def __init__(self, in_channels, channels):
+        super(PAM, self).__init__(
+            key_in_channels=in_channels,
+            query_in_channels=in_channels,
+            channels=channels,
+            out_channels=in_channels,
+            share_key_query=False,
+            query_downsample=None,
+            key_downsample=None,
+            key_query_num_convs=1,
+            key_query_norm=False,
+            value_out_num_convs=1,
+            value_out_norm=False,
+            matmul_norm=False,
+            with_out=False,
+            conv_cfg=None,
+            norm_cfg=None,
+            act_cfg=None)
+
+        self.gamma = Scale(0)
+
+    def forward(self, x):
+        """Forward function."""
+        out = super(PAM, self).forward(x, x)
+
+        out = self.gamma(out) + x
+        return out
+
+
+class CAM(nn.Module):
+    """Channel Attention Module (CAM)"""
+
+    def __init__(self):
+        super(CAM, self).__init__()
+        self.gamma = Scale(0)
+
+    def forward(self, x):
+        """Forward function."""
+        batch_size, channels, height, width = x.size()
+        proj_query = x.view(batch_size, channels, -1)
+        proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1)
+        energy = torch.bmm(proj_query, proj_key)
+        energy_new = torch.max(
+            energy, -1, keepdim=True)[0].expand_as(energy) - energy
+        attention = F.softmax(energy_new, dim=-1)
+        proj_value = x.view(batch_size, channels, -1)
+
+        out = torch.bmm(attention, proj_value)
+        out = out.view(batch_size, channels, height, width)
+
+        out = self.gamma(out) + x
+        return out
+
+
+@HEADS.register_module()
+class DAHead(BaseDecodeHead):
+    """Dual Attention Network for Scene Segmentation.
+
+    This head is the implementation of `DANet
+    <https://arxiv.org/abs/1809.02983>`_.
+
+    Args:
+        pam_channels (int): The channels of Position Attention Module(PAM).
+    """
+
+    def __init__(self, pam_channels, **kwargs):
+        super(DAHead, self).__init__(**kwargs)
+        self.pam_channels = pam_channels
+        self.pam_in_conv = ConvModule(
+            self.in_channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.pam = PAM(self.channels, pam_channels)
+        self.pam_out_conv = ConvModule(
+            self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.pam_conv_seg = nn.Conv2d(
+            self.channels, self.num_classes, kernel_size=1)
+
+        self.cam_in_conv = ConvModule(
+            self.in_channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.cam = CAM()
+        self.cam_out_conv = ConvModule(
+            self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.cam_conv_seg = nn.Conv2d(
+            self.channels, self.num_classes, kernel_size=1)
+
+    def pam_cls_seg(self, feat):
+        """PAM feature classification."""
+        if self.dropout is not None:
+            feat = self.dropout(feat)
+        output = self.pam_conv_seg(feat)
+        return output
+
+    def cam_cls_seg(self, feat):
+        """CAM feature classification."""
+        if self.dropout is not None:
+            feat = self.dropout(feat)
+        output = self.cam_conv_seg(feat)
+        return output
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        pam_feat = self.pam_in_conv(x)
+        pam_feat = self.pam(pam_feat)
+        pam_feat = self.pam_out_conv(pam_feat)
+        pam_out = self.pam_cls_seg(pam_feat)
+
+        cam_feat = self.cam_in_conv(x)
+        cam_feat = self.cam(cam_feat)
+        cam_feat = self.cam_out_conv(cam_feat)
+        cam_out = self.cam_cls_seg(cam_feat)
+
+        feat_sum = pam_feat + cam_feat
+        pam_cam_out = self.cls_seg(feat_sum)
+
+        return pam_cam_out, pam_out, cam_out
+
+    def forward_test(self, inputs, img_metas, test_cfg):
+        """Forward function for testing, only ``pam_cam`` is used."""
+        return self.forward(inputs)[0]
+
+    def losses(self, seg_logit, seg_label):
+        """Compute ``pam_cam``, ``pam``, ``cam`` loss."""
+        pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit
+        loss = dict()
+        loss.update(
+            add_prefix(
+                super(DAHead, self).losses(pam_cam_seg_logit, seg_label),
+                'pam_cam'))
+        loss.update(
+            add_prefix(
+                super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam'))
+        loss.update(
+            add_prefix(
+                super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam'))
+        return loss
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/decode_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/decode_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c557651cd284a3cbd1002ee0d84f1324026e439
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/decode_head.py
@@ -0,0 +1,238 @@
+from abc import ABCMeta, abstractmethod
+
+import torch
+import torch.nn as nn
+from torch.contrib.npu.optimized_lib import module as nnn
+from mmcv.cnn import normal_init
+from mmcv.runner import auto_fp16, force_fp32
+
+from mmseg.core import build_pixel_sampler
+from mmseg.ops import resize
+from ..builder import build_loss
+from ..losses import accuracy
+
+
+class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
+    """Base class for BaseDecodeHead.
+
+    Args:
+        in_channels (int|Sequence[int]): Input channels.
+        channels (int): Channels after modules, before conv_seg.
+        num_classes (int): Number of classes.
+        dropout_ratio (float): Ratio of dropout layer. Default: 0.1.
+        conv_cfg (dict|None): Config of conv layers. Default: None.
+        norm_cfg (dict|None): Config of norm layers. Default: None.
+        act_cfg (dict): Config of activation layers.
+            Default: dict(type='ReLU')
+        in_index (int|Sequence[int]): Input feature index. Default: -1
+        input_transform (str|None): Transformation type of input features.
+            Options: 'resize_concat', 'multiple_select', None.
+            'resize_concat': Multiple feature maps will be resize to the
+                same size as first one and than concat together.
+                Usually used in FCN head of HRNet.
+            'multiple_select': Multiple feature maps will be bundle into
+                a list and passed into decode head.
+            None: Only one select feature map is allowed.
+            Default: None.
+        loss_decode (dict): Config of decode loss.
+            Default: dict(type='CrossEntropyLoss').
+        ignore_index (int | None): The label index to be ignored. When using
+            masked BCE loss, ignore_index should be set to None. Default: 255
+        sampler (dict|None): The config of segmentation map sampler.
+            Default: None.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 channels,
+                 *,
+                 num_classes,
+                 dropout_ratio=0.1,
+                 conv_cfg=None,
+                 norm_cfg=None,
+                 act_cfg=dict(type='ReLU'),
+                 in_index=-1,
+                 input_transform=None,
+                 loss_decode=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=False,
+                     loss_weight=1.0),
+                 decoder_params=None,
+                 ignore_index=255,
+                 sampler=None,
+                 align_corners=False):
+        super(BaseDecodeHead, self).__init__()
+        self._init_inputs(in_channels, in_index, input_transform)
+        self.channels = channels
+        self.num_classes = num_classes
+        self.dropout_ratio = dropout_ratio
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.in_index = in_index
+        self.loss_decode = build_loss(loss_decode)
+        self.ignore_index = ignore_index
+        self.align_corners = align_corners
+
+        if sampler is not None:
+            self.sampler = build_pixel_sampler(sampler, context=self)
+        else:
+            self.sampler = None
+
+        #self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1)
+        if dropout_ratio > 0:
+            self.dropout = nnn.DropoutV2(dropout_ratio)
+        else:
+            self.dropout = None
+        self.fp16_enabled = False
+
+    def extra_repr(self):
+        """Extra repr."""
+        s = f'input_transform={self.input_transform}, ' \
+            f'ignore_index={self.ignore_index}, ' \
+            f'align_corners={self.align_corners}'
+        return s
+
+    def _init_inputs(self, in_channels, in_index, input_transform):
+        """Check and initialize input transforms.
+
+        The in_channels, in_index and input_transform must match.
+        Specifically, when input_transform is None, only single feature map
+        will be selected. So in_channels and in_index must be of type int.
+        When input_transform
+
+        Args:
+            in_channels (int|Sequence[int]): Input channels.
+            in_index (int|Sequence[int]): Input feature index.
+            input_transform (str|None): Transformation type of input features.
+                Options: 'resize_concat', 'multiple_select', None.
+                'resize_concat': Multiple feature maps will be resize to the
+                    same size as first one and than concat together.
+                    Usually used in FCN head of HRNet.
+                'multiple_select': Multiple feature maps will be bundle into
+                    a list and passed into decode head.
+                None: Only one select feature map is allowed.
+        """
+
+        if input_transform is not None:
+            assert input_transform in ['resize_concat', 'multiple_select']
+        self.input_transform = input_transform
+        self.in_index = in_index
+        if input_transform is not None:
+            assert isinstance(in_channels, (list, tuple))
+            assert isinstance(in_index, (list, tuple))
+            assert len(in_channels) == len(in_index)
+            if input_transform == 'resize_concat':
+                self.in_channels = sum(in_channels)
+            else:
+                self.in_channels = in_channels
+        else:
+            assert isinstance(in_channels, int)
+            assert isinstance(in_index, int)
+            self.in_channels = in_channels
+
+    def init_weights(self):
+        """Initialize weights of classification layer."""
+        pass
+        #normal_init(self.conv_seg, mean=0, std=0.01)
+
+    def _transform_inputs(self, inputs):
+        """Transform inputs for decoder.
+
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+
+        Returns:
+            Tensor: The transformed inputs
+        """
+
+        if self.input_transform == 'resize_concat':
+            inputs = [inputs[i] for i in self.in_index]
+            upsampled_inputs = [
+                resize(
+                    input=x,
+                    size=inputs[0].shape[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners) for x in inputs
+            ]
+            inputs = torch.cat(upsampled_inputs, dim=1)
+        elif self.input_transform == 'multiple_select':
+            inputs = [inputs[i] for i in self.in_index]
+        else:
+            inputs = inputs[self.in_index]
+
+        return inputs
+
+    @auto_fp16()
+    @abstractmethod
+    def forward(self, inputs):
+        """Placeholder of forward function."""
+        pass
+
+    def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg):
+        """Forward function for training.
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            gt_semantic_seg (Tensor): Semantic segmentation masks
+                used if the architecture supports semantic segmentation task.
+            train_cfg (dict): The training config.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+        seg_logits = self.forward(inputs)
+        losses = self.losses(seg_logits, gt_semantic_seg)
+        return losses
+
+    def forward_test(self, inputs, img_metas, test_cfg):
+        """Forward function for testing.
+
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            test_cfg (dict): The testing config.
+
+        Returns:
+            Tensor: Output segmentation map.
+        """
+        return self.forward(inputs)
+
+    def cls_seg(self, feat):
+        """Classify each pixel."""
+        if self.dropout is not None:
+            feat = self.dropout(feat)
+        output = self.conv_seg(feat)
+        return output
+
+    @force_fp32(apply_to=('seg_logit', ))
+    def losses(self, seg_logit, seg_label):
+        """Compute segmentation loss."""
+        loss = dict()
+        seg_logit = resize(
+            input=seg_logit,
+            size=seg_label.shape[2:],
+            mode='bilinear',
+            align_corners=self.align_corners)
+        if self.sampler is not None:
+            seg_weight = self.sampler.sample(seg_logit, seg_label)
+        else:
+            seg_weight = None
+        seg_label = seg_label.squeeze(1)
+        loss['loss_seg'] = self.loss_decode(
+            seg_logit,
+            seg_label,
+            weight=seg_weight,
+            ignore_index=self.ignore_index)
+        #loss['acc_seg'] = accuracy(seg_logit, seg_label)
+        return loss
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/dm_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/dm_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c918fc35d1477f7a961e3c8185f451583e3e8d8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/dm_head.py
@@ -0,0 +1,140 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer
+
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+class DCM(nn.Module):
+    """Dynamic Convolutional Module used in DMNet.
+
+    Args:
+        filter_size (int): The filter size of generated convolution kernel
+            used in Dynamic Convolutional Module.
+        fusion (bool): Add one conv to fuse DCM output feature.
+        in_channels (int): Input channels.
+        channels (int): Channels after modules, before conv_seg.
+        conv_cfg (dict | None): Config of conv layers.
+        norm_cfg (dict | None): Config of norm layers.
+        act_cfg (dict): Config of activation layers.
+    """
+
+    def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg,
+                 norm_cfg, act_cfg):
+        super(DCM, self).__init__()
+        self.filter_size = filter_size
+        self.fusion = fusion
+        self.in_channels = in_channels
+        self.channels = channels
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1,
+                                         0)
+
+        self.input_redu_conv = ConvModule(
+            self.in_channels,
+            self.channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+        if self.norm_cfg is not None:
+            self.norm = build_norm_layer(self.norm_cfg, self.channels)[1]
+        else:
+            self.norm = None
+        self.activate = build_activation_layer(self.act_cfg)
+
+        if self.fusion:
+            self.fusion_conv = ConvModule(
+                self.channels,
+                self.channels,
+                1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+
+    def forward(self, x):
+        """Forward function."""
+        generted_filter = self.filter_gen_conv(
+            F.adaptive_avg_pool2d(x, self.filter_size))
+        x = self.input_redu_conv(x)
+        b, c, h, w = x.shape
+        # [1, b * c, h, w], c = self.channels
+        x = x.view(1, b * c, h, w)
+        # [b * c, 1, filter_size, filter_size]
+        generted_filter = generted_filter.view(b * c, 1, self.filter_size,
+                                               self.filter_size)
+        pad = (self.filter_size - 1) // 2
+        if (self.filter_size - 1) % 2 == 0:
+            p2d = (pad, pad, pad, pad)
+        else:
+            p2d = (pad + 1, pad, pad + 1, pad)
+        x = F.pad(input=x, pad=p2d, mode='constant', value=0)
+        # [1, b * c, h, w]
+        output = F.conv2d(input=x, weight=generted_filter, groups=b * c)
+        # [b, c, h, w]
+        output = output.view(b, c, h, w)
+        if self.norm is not None:
+            output = self.norm(output)
+        output = self.activate(output)
+
+        if self.fusion:
+            output = self.fusion_conv(output)
+
+        return output
+
+
+@HEADS.register_module()
+class DMHead(BaseDecodeHead):
+    """Dynamic Multi-scale Filters for Semantic Segmentation.
+
+    This head is the implementation of
+    `DMNet <https://openaccess.thecvf.com/content_ICCV_2019/papers/\
+        He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_\
+            ICCV_2019_paper.pdf>`_.
+
+    Args:
+        filter_sizes (tuple[int]): The size of generated convolutional filters
+            used in Dynamic Convolutional Module. Default: (1, 3, 5, 7).
+        fusion (bool): Add one conv to fuse DCM output feature.
+    """
+
+    def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs):
+        super(DMHead, self).__init__(**kwargs)
+        assert isinstance(filter_sizes, (list, tuple))
+        self.filter_sizes = filter_sizes
+        self.fusion = fusion
+        dcm_modules = []
+        for filter_size in self.filter_sizes:
+            dcm_modules.append(
+                DCM(filter_size,
+                    self.fusion,
+                    self.in_channels,
+                    self.channels,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg))
+        self.dcm_modules = nn.ModuleList(dcm_modules)
+        self.bottleneck = ConvModule(
+            self.in_channels + len(filter_sizes) * self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        dcm_outs = [x]
+        for dcm_module in self.dcm_modules:
+            dcm_outs.append(dcm_module(x))
+        dcm_outs = torch.cat(dcm_outs, dim=1)
+        output = self.bottleneck(dcm_outs)
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/dnl_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/dnl_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..52a662ccb6ae8ff00930eb54ed71113724b6494e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/dnl_head.py
@@ -0,0 +1,131 @@
+import torch
+from mmcv.cnn import NonLocal2d
+from torch import nn
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+class DisentangledNonLocal2d(NonLocal2d):
+    """Disentangled Non-Local Blocks.
+
+    Args:
+        temperature (float): Temperature to adjust attention. Default: 0.05
+    """
+
+    def __init__(self, *arg, temperature, **kwargs):
+        super().__init__(*arg, **kwargs)
+        self.temperature = temperature
+        self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1)
+
+    def embedded_gaussian(self, theta_x, phi_x):
+        """Embedded gaussian with temperature."""
+
+        # NonLocal2d pairwise_weight: [N, HxW, HxW]
+        pairwise_weight = torch.matmul(theta_x, phi_x)
+        if self.use_scale:
+            # theta_x.shape[-1] is `self.inter_channels`
+            pairwise_weight /= theta_x.shape[-1]**0.5
+        pairwise_weight /= self.temperature
+        pairwise_weight = pairwise_weight.softmax(dim=-1)
+        return pairwise_weight
+
+    def forward(self, x):
+        # x: [N, C, H, W]
+        n = x.size(0)
+
+        # g_x: [N, HxW, C]
+        g_x = self.g(x).view(n, self.inter_channels, -1)
+        g_x = g_x.permute(0, 2, 1)
+
+        # theta_x: [N, HxW, C], phi_x: [N, C, HxW]
+        if self.mode == 'gaussian':
+            theta_x = x.view(n, self.in_channels, -1)
+            theta_x = theta_x.permute(0, 2, 1)
+            if self.sub_sample:
+                phi_x = self.phi(x).view(n, self.in_channels, -1)
+            else:
+                phi_x = x.view(n, self.in_channels, -1)
+        elif self.mode == 'concatenation':
+            theta_x = self.theta(x).view(n, self.inter_channels, -1, 1)
+            phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)
+        else:
+            theta_x = self.theta(x).view(n, self.inter_channels, -1)
+            theta_x = theta_x.permute(0, 2, 1)
+            phi_x = self.phi(x).view(n, self.inter_channels, -1)
+
+        # subtract mean
+        theta_x -= theta_x.mean(dim=-2, keepdim=True)
+        phi_x -= phi_x.mean(dim=-1, keepdim=True)
+
+        pairwise_func = getattr(self, self.mode)
+        # pairwise_weight: [N, HxW, HxW]
+        pairwise_weight = pairwise_func(theta_x, phi_x)
+
+        # y: [N, HxW, C]
+        y = torch.matmul(pairwise_weight, g_x)
+        # y: [N, C, H, W]
+        y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels,
+                                                    *x.size()[2:])
+
+        # unary_mask: [N, 1, HxW]
+        unary_mask = self.conv_mask(x)
+        unary_mask = unary_mask.view(n, 1, -1)
+        unary_mask = unary_mask.softmax(dim=-1)
+        # unary_x: [N, 1, C]
+        unary_x = torch.matmul(unary_mask, g_x)
+        # unary_x: [N, C, 1, 1]
+        unary_x = unary_x.permute(0, 2, 1).contiguous().reshape(
+            n, self.inter_channels, 1, 1)
+
+        output = x + self.conv_out(y + unary_x)
+
+        return output
+
+
+@HEADS.register_module()
+class DNLHead(FCNHead):
+    """Disentangled Non-Local Neural Networks.
+
+    This head is the implementation of `DNLNet
+    <https://arxiv.org/abs/2006.06668>`_.
+
+    Args:
+        reduction (int): Reduction factor of projection transform. Default: 2.
+        use_scale (bool): Whether to scale pairwise_weight by
+            sqrt(1/inter_channels). Default: False.
+        mode (str): The nonlocal mode. Options are 'embedded_gaussian',
+            'dot_product'. Default: 'embedded_gaussian.'.
+        temperature (float): Temperature to adjust attention. Default: 0.05
+    """
+
+    def __init__(self,
+                 reduction=2,
+                 use_scale=True,
+                 mode='embedded_gaussian',
+                 temperature=0.05,
+                 **kwargs):
+        super(DNLHead, self).__init__(num_convs=2, **kwargs)
+        self.reduction = reduction
+        self.use_scale = use_scale
+        self.mode = mode
+        self.temperature = temperature
+        self.dnl_block = DisentangledNonLocal2d(
+            in_channels=self.channels,
+            reduction=self.reduction,
+            use_scale=self.use_scale,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            mode=self.mode,
+            temperature=self.temperature)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        output = self.convs[0](x)
+        output = self.dnl_block(output)
+        output = self.convs[1](output)
+        if self.concat_input:
+            output = self.conv_cat(torch.cat([x, output], dim=1))
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ema_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ema_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..619d757046a61e2dd1ad4b88a8b85f4688f35492
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ema_head.py
@@ -0,0 +1,168 @@
+import math
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+def reduce_mean(tensor):
+    """Reduce mean when distributed training."""
+    if not (dist.is_available() and dist.is_initialized()):
+        return tensor
+    tensor = tensor.clone()
+    dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM)
+    return tensor
+
+
+class EMAModule(nn.Module):
+    """Expectation Maximization Attention Module used in EMANet.
+
+    Args:
+        channels (int): Channels of the whole module.
+        num_bases (int): Number of bases.
+        num_stages (int): Number of the EM iterations.
+    """
+
+    def __init__(self, channels, num_bases, num_stages, momentum):
+        super(EMAModule, self).__init__()
+        assert num_stages >= 1, 'num_stages must be at least 1!'
+        self.num_bases = num_bases
+        self.num_stages = num_stages
+        self.momentum = momentum
+
+        bases = torch.zeros(1, channels, self.num_bases)
+        bases.normal_(0, math.sqrt(2. / self.num_bases))
+        # [1, channels, num_bases]
+        bases = F.normalize(bases, dim=1, p=2)
+        self.register_buffer('bases', bases)
+
+    def forward(self, feats):
+        """Forward function."""
+        batch_size, channels, height, width = feats.size()
+        # [batch_size, channels, height*width]
+        feats = feats.view(batch_size, channels, height * width)
+        # [batch_size, channels, num_bases]
+        bases = self.bases.repeat(batch_size, 1, 1)
+
+        with torch.no_grad():
+            for i in range(self.num_stages):
+                # [batch_size, height*width, num_bases]
+                attention = torch.einsum('bcn,bck->bnk', feats, bases)
+                attention = F.softmax(attention, dim=2)
+                # l1 norm
+                attention_normed = F.normalize(attention, dim=1, p=1)
+                # [batch_size, channels, num_bases]
+                bases = torch.einsum('bcn,bnk->bck', feats, attention_normed)
+                # l2 norm
+                bases = F.normalize(bases, dim=1, p=2)
+
+        feats_recon = torch.einsum('bck,bnk->bcn', bases, attention)
+        feats_recon = feats_recon.view(batch_size, channels, height, width)
+
+        if self.training:
+            bases = bases.mean(dim=0, keepdim=True)
+            bases = reduce_mean(bases)
+            # l2 norm
+            bases = F.normalize(bases, dim=1, p=2)
+            self.bases = (1 -
+                          self.momentum) * self.bases + self.momentum * bases
+
+        return feats_recon
+
+
+@HEADS.register_module()
+class EMAHead(BaseDecodeHead):
+    """Expectation Maximization Attention Networks for Semantic Segmentation.
+
+    This head is the implementation of `EMANet
+    <https://arxiv.org/abs/1907.13426>`_.
+
+    Args:
+        ema_channels (int): EMA module channels
+        num_bases (int): Number of bases.
+        num_stages (int): Number of the EM iterations.
+        concat_input (bool): Whether concat the input and output of convs
+            before classification layer. Default: True
+        momentum (float): Momentum to update the base. Default: 0.1.
+    """
+
+    def __init__(self,
+                 ema_channels,
+                 num_bases,
+                 num_stages,
+                 concat_input=True,
+                 momentum=0.1,
+                 **kwargs):
+        super(EMAHead, self).__init__(**kwargs)
+        self.ema_channels = ema_channels
+        self.num_bases = num_bases
+        self.num_stages = num_stages
+        self.concat_input = concat_input
+        self.momentum = momentum
+        self.ema_module = EMAModule(self.ema_channels, self.num_bases,
+                                    self.num_stages, self.momentum)
+
+        self.ema_in_conv = ConvModule(
+            self.in_channels,
+            self.ema_channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        # project (0, inf) -> (-inf, inf)
+        self.ema_mid_conv = ConvModule(
+            self.ema_channels,
+            self.ema_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=None,
+            act_cfg=None)
+        for param in self.ema_mid_conv.parameters():
+            param.requires_grad = False
+
+        self.ema_out_conv = ConvModule(
+            self.ema_channels,
+            self.ema_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.bottleneck = ConvModule(
+            self.ema_channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        if self.concat_input:
+            self.conv_cat = ConvModule(
+                self.in_channels + self.channels,
+                self.channels,
+                kernel_size=3,
+                padding=1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        feats = self.ema_in_conv(x)
+        identity = feats
+        feats = self.ema_mid_conv(feats)
+        recon = self.ema_module(feats)
+        recon = F.relu(recon, inplace=True)
+        recon = self.ema_out_conv(recon)
+        output = F.relu(identity + recon, inplace=True)
+        output = self.bottleneck(output)
+        if self.concat_input:
+            output = self.conv_cat(torch.cat([x, output], dim=1))
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/enc_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/enc_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c11994cf6272bd52ed3576486f4b8d7366af940
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/enc_head.py
@@ -0,0 +1,187 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, build_norm_layer
+
+from mmseg.ops import Encoding, resize
+from ..builder import HEADS, build_loss
+from .decode_head import BaseDecodeHead
+
+
+class EncModule(nn.Module):
+    """Encoding Module used in EncNet.
+
+    Args:
+        in_channels (int): Input channels.
+        num_codes (int): Number of code words.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict): Config of activation layers.
+    """
+
+    def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg):
+        super(EncModule, self).__init__()
+        self.encoding_project = ConvModule(
+            in_channels,
+            in_channels,
+            1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        # TODO: resolve this hack
+        # change to 1d
+        if norm_cfg is not None:
+            encoding_norm_cfg = norm_cfg.copy()
+            if encoding_norm_cfg['type'] in ['BN', 'IN']:
+                encoding_norm_cfg['type'] += '1d'
+            else:
+                encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace(
+                    '2d', '1d')
+        else:
+            # fallback to BN1d
+            encoding_norm_cfg = dict(type='BN1d')
+        self.encoding = nn.Sequential(
+            Encoding(channels=in_channels, num_codes=num_codes),
+            build_norm_layer(encoding_norm_cfg, num_codes)[1],
+            nn.ReLU(inplace=True))
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, in_channels), nn.Sigmoid())
+
+    def forward(self, x):
+        """Forward function."""
+        encoding_projection = self.encoding_project(x)
+        encoding_feat = self.encoding(encoding_projection).mean(dim=1)
+        batch_size, channels, _, _ = x.size()
+        gamma = self.fc(encoding_feat)
+        y = gamma.view(batch_size, channels, 1, 1)
+        output = F.relu_(x + x * y)
+        return encoding_feat, output
+
+
+@HEADS.register_module()
+class EncHead(BaseDecodeHead):
+    """Context Encoding for Semantic Segmentation.
+
+    This head is the implementation of `EncNet
+    <https://arxiv.org/abs/1803.08904>`_.
+
+    Args:
+        num_codes (int): Number of code words. Default: 32.
+        use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to
+            regularize the training. Default: True.
+        add_lateral (bool): Whether use lateral connection to fuse features.
+            Default: False.
+        loss_se_decode (dict): Config of decode loss.
+            Default: dict(type='CrossEntropyLoss', use_sigmoid=True).
+    """
+
+    def __init__(self,
+                 num_codes=32,
+                 use_se_loss=True,
+                 add_lateral=False,
+                 loss_se_decode=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=True,
+                     loss_weight=0.2),
+                 **kwargs):
+        super(EncHead, self).__init__(
+            input_transform='multiple_select', **kwargs)
+        self.use_se_loss = use_se_loss
+        self.add_lateral = add_lateral
+        self.num_codes = num_codes
+        self.bottleneck = ConvModule(
+            self.in_channels[-1],
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        if add_lateral:
+            self.lateral_convs = nn.ModuleList()
+            for in_channels in self.in_channels[:-1]:  # skip the last one
+                self.lateral_convs.append(
+                    ConvModule(
+                        in_channels,
+                        self.channels,
+                        1,
+                        conv_cfg=self.conv_cfg,
+                        norm_cfg=self.norm_cfg,
+                        act_cfg=self.act_cfg))
+            self.fusion = ConvModule(
+                len(self.in_channels) * self.channels,
+                self.channels,
+                3,
+                padding=1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+        self.enc_module = EncModule(
+            self.channels,
+            num_codes=num_codes,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        if self.use_se_loss:
+            self.loss_se_decode = build_loss(loss_se_decode)
+            self.se_layer = nn.Linear(self.channels, self.num_classes)
+
+    def forward(self, inputs):
+        """Forward function."""
+        inputs = self._transform_inputs(inputs)
+        feat = self.bottleneck(inputs[-1])
+        if self.add_lateral:
+            laterals = [
+                resize(
+                    lateral_conv(inputs[i]),
+                    size=feat.shape[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners)
+                for i, lateral_conv in enumerate(self.lateral_convs)
+            ]
+            feat = self.fusion(torch.cat([feat, *laterals], 1))
+        encode_feat, output = self.enc_module(feat)
+        output = self.cls_seg(output)
+        if self.use_se_loss:
+            se_output = self.se_layer(encode_feat)
+            return output, se_output
+        else:
+            return output
+
+    def forward_test(self, inputs, img_metas, test_cfg):
+        """Forward function for testing, ignore se_loss."""
+        if self.use_se_loss:
+            return self.forward(inputs)[0]
+        else:
+            return self.forward(inputs)
+
+    @staticmethod
+    def _convert_to_onehot_labels(seg_label, num_classes):
+        """Convert segmentation label to onehot.
+
+        Args:
+            seg_label (Tensor): Segmentation label of shape (N, H, W).
+            num_classes (int): Number of classes.
+
+        Returns:
+            Tensor: Onehot labels of shape (N, num_classes).
+        """
+
+        batch_size = seg_label.size(0)
+        onehot_labels = seg_label.new_zeros((batch_size, num_classes))
+        for i in range(batch_size):
+            hist = seg_label[i].float().histc(
+                bins=num_classes, min=0, max=num_classes - 1)
+            onehot_labels[i] = hist > 0
+        return onehot_labels
+
+    def losses(self, seg_logit, seg_label):
+        """Compute segmentation and semantic encoding loss."""
+        seg_logit, se_seg_logit = seg_logit
+        loss = dict()
+        loss.update(super(EncHead, self).losses(seg_logit, seg_label))
+        se_loss = self.loss_se_decode(
+            se_seg_logit,
+            self._convert_to_onehot_labels(seg_label, self.num_classes))
+        loss['loss_se'] = se_loss
+        return loss
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/fcn_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/fcn_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..d660847f8919c3d53d0e41358962fedab5a25044
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/fcn_head.py
@@ -0,0 +1,76 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+@HEADS.register_module()
+class FCNHead(BaseDecodeHead):
+    """Fully Convolution Networks for Semantic Segmentation.
+
+    This head is implemented of `FCNNet <https://arxiv.org/abs/1411.4038>`_.
+
+    Args:
+        num_convs (int): Number of convs in the head. Default: 2.
+        kernel_size (int): The kernel size for convs in the head. Default: 3.
+        concat_input (bool): Whether concat the input and output of convs
+            before classification layer.
+    """
+
+    def __init__(self,
+                 num_convs=2,
+                 kernel_size=3,
+                 concat_input=True,
+                 **kwargs):
+        assert num_convs >= 0
+        self.num_convs = num_convs
+        self.concat_input = concat_input
+        self.kernel_size = kernel_size
+        super(FCNHead, self).__init__(**kwargs)
+        if num_convs == 0:
+            assert self.in_channels == self.channels
+
+        convs = []
+        convs.append(
+            ConvModule(
+                self.in_channels,
+                self.channels,
+                kernel_size=kernel_size,
+                padding=kernel_size // 2,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg))
+        for i in range(num_convs - 1):
+            convs.append(
+                ConvModule(
+                    self.channels,
+                    self.channels,
+                    kernel_size=kernel_size,
+                    padding=kernel_size // 2,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg))
+        if num_convs == 0:
+            self.convs = nn.Identity()
+        else:
+            self.convs = nn.Sequential(*convs)
+        if self.concat_input:
+            self.conv_cat = ConvModule(
+                self.in_channels + self.channels,
+                self.channels,
+                kernel_size=kernel_size,
+                padding=kernel_size // 2,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        output = self.convs(x)
+        if self.concat_input:
+            output = self.conv_cat(torch.cat([x, output], dim=1))
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/fpn_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/fpn_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..00beddc3e982d46b6806b7b99290ea9a8accc10e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/fpn_head.py
@@ -0,0 +1,69 @@
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+#from IPython import embed
+
+@HEADS.register_module()
+class FPNHead(BaseDecodeHead):
+    """Panoptic Feature Pyramid Networks.
+
+    This head is the implementation of `Semantic FPN
+    <https://arxiv.org/abs/1901.02446>`_.
+
+    Args:
+        feature_strides (tuple[int]): The strides for input feature maps.
+            stack_lateral. All strides suppose to be power of 2. The first
+            one is of largest resolution.
+    """
+
+    def __init__(self, feature_strides, **kwargs):
+        super(FPNHead, self).__init__(
+            input_transform='multiple_select', **kwargs)
+        assert len(feature_strides) == len(self.in_channels)
+        assert min(feature_strides) == feature_strides[0]
+        self.feature_strides = feature_strides
+
+        self.scale_heads = nn.ModuleList()
+        for i in range(len(feature_strides)):
+            head_length = max(
+                1,
+                int(np.log2(feature_strides[i]) - np.log2(feature_strides[0])))
+            scale_head = []
+            for k in range(head_length):
+                scale_head.append(
+                    ConvModule(
+                        self.in_channels[i] if k == 0 else self.channels,
+                        self.channels,
+                        3,
+                        padding=1,
+                        conv_cfg=self.conv_cfg,
+                        norm_cfg=self.norm_cfg,
+                        act_cfg=self.act_cfg))
+                if feature_strides[i] != feature_strides[0]:
+                    scale_head.append(
+                        nn.Upsample(
+                            scale_factor=2,
+                            mode='bilinear',
+                            align_corners=self.align_corners))
+            self.scale_heads.append(nn.Sequential(*scale_head))
+
+    def forward(self, inputs):
+
+        x = self._transform_inputs(inputs)
+
+        output = self.scale_heads[0](x[0])
+        for i in range(1, len(self.feature_strides)):
+            # non inplace
+            output = output + resize(
+                self.scale_heads[i](x[i]),
+                size=output.shape[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+
+        output = self.cls_seg(output)
+        # embed(header='123123')
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/gc_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/gc_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..336866375002c84206bec0d8c8e06518abefac6f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/gc_head.py
@@ -0,0 +1,47 @@
+import torch
+from mmcv.cnn import ContextBlock
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+@HEADS.register_module()
+class GCHead(FCNHead):
+    """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond.
+
+    This head is the implementation of `GCNet
+    <https://arxiv.org/abs/1904.11492>`_.
+
+    Args:
+        ratio (float): Multiplier of channels ratio. Default: 1/4.
+        pooling_type (str): The pooling type of context aggregation.
+            Options are 'att', 'avg'. Default: 'avg'.
+        fusion_types (tuple[str]): The fusion type for feature fusion.
+            Options are 'channel_add', 'channel_mul'. Defautl: ('channel_add',)
+    """
+
+    def __init__(self,
+                 ratio=1 / 4.,
+                 pooling_type='att',
+                 fusion_types=('channel_add', ),
+                 **kwargs):
+        super(GCHead, self).__init__(num_convs=2, **kwargs)
+        self.ratio = ratio
+        self.pooling_type = pooling_type
+        self.fusion_types = fusion_types
+        self.gc_block = ContextBlock(
+            in_channels=self.channels,
+            ratio=self.ratio,
+            pooling_type=self.pooling_type,
+            fusion_types=self.fusion_types)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        output = self.convs[0](x)
+        output = self.gc_block(output)
+        output = self.convs[1](output)
+        if self.concat_input:
+            output = self.conv_cat(torch.cat([x, output], dim=1))
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/lraspp_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/lraspp_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..32a093caded74a97e991ca61d45bec888396c9f2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/lraspp_head.py
@@ -0,0 +1,90 @@
+import torch
+import torch.nn as nn
+from mmcv import is_tuple_of
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+@HEADS.register_module()
+class LRASPPHead(BaseDecodeHead):
+    """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3.
+
+    This head is the improved implementation of `Searching for MobileNetV3
+    <https://ieeexplore.ieee.org/document/9008835>`_.
+
+    Args:
+        branch_channels (tuple[int]): The number of output channels in every
+            each branch. Default: (32, 64).
+    """
+
+    def __init__(self, branch_channels=(32, 64), **kwargs):
+        super(LRASPPHead, self).__init__(**kwargs)
+        if self.input_transform != 'multiple_select':
+            raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform '
+                             f'must be \'multiple_select\'. But received '
+                             f'\'{self.input_transform}\'')
+        assert is_tuple_of(branch_channels, int)
+        assert len(branch_channels) == len(self.in_channels) - 1
+        self.branch_channels = branch_channels
+
+        self.convs = nn.Sequential()
+        self.conv_ups = nn.Sequential()
+        for i in range(len(branch_channels)):
+            self.convs.add_module(
+                f'conv{i}',
+                nn.Conv2d(
+                    self.in_channels[i], branch_channels[i], 1, bias=False))
+            self.conv_ups.add_module(
+                f'conv_up{i}',
+                ConvModule(
+                    self.channels + branch_channels[i],
+                    self.channels,
+                    1,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg,
+                    bias=False))
+
+        self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1)
+
+        self.aspp_conv = ConvModule(
+            self.in_channels[-1],
+            self.channels,
+            1,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            bias=False)
+        self.image_pool = nn.Sequential(
+            nn.AvgPool2d(kernel_size=49, stride=(16, 20)),
+            ConvModule(
+                self.in_channels[2],
+                self.channels,
+                1,
+                act_cfg=dict(type='Sigmoid'),
+                bias=False))
+
+    def forward(self, inputs):
+        """Forward function."""
+        inputs = self._transform_inputs(inputs)
+
+        x = inputs[-1]
+
+        x = self.aspp_conv(x) * resize(
+            self.image_pool(x),
+            size=x.size()[2:],
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x = self.conv_up_input(x)
+
+        for i in range(len(self.branch_channels) - 1, -1, -1):
+            x = resize(
+                x,
+                size=inputs[i].size()[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+            x = torch.cat([x, self.convs[i](inputs[i])], 1)
+            x = self.conv_ups[i](x)
+
+        return self.cls_seg(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/nl_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/nl_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..31658755a6599bc9f52bd59767ef60452d5b9fbb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/nl_head.py
@@ -0,0 +1,49 @@
+import torch
+from mmcv.cnn import NonLocal2d
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+@HEADS.register_module()
+class NLHead(FCNHead):
+    """Non-local Neural Networks.
+
+    This head is the implementation of `NLNet
+    <https://arxiv.org/abs/1711.07971>`_.
+
+    Args:
+        reduction (int): Reduction factor of projection transform. Default: 2.
+        use_scale (bool): Whether to scale pairwise_weight by
+            sqrt(1/inter_channels). Default: True.
+        mode (str): The nonlocal mode. Options are 'embedded_gaussian',
+            'dot_product'. Default: 'embedded_gaussian.'.
+    """
+
+    def __init__(self,
+                 reduction=2,
+                 use_scale=True,
+                 mode='embedded_gaussian',
+                 **kwargs):
+        super(NLHead, self).__init__(num_convs=2, **kwargs)
+        self.reduction = reduction
+        self.use_scale = use_scale
+        self.mode = mode
+        self.nl_block = NonLocal2d(
+            in_channels=self.channels,
+            reduction=self.reduction,
+            use_scale=self.use_scale,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            mode=self.mode)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        output = self.convs[0](x)
+        output = self.nl_block(output)
+        output = self.convs[1](output)
+        if self.concat_input:
+            output = self.conv_cat(torch.cat([x, output], dim=1))
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ocr_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ocr_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..e180e10276e9a4d794501d0f399740c189775673
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/ocr_head.py
@@ -0,0 +1,127 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from ..utils import SelfAttentionBlock as _SelfAttentionBlock
+from .cascade_decode_head import BaseCascadeDecodeHead
+
+
+class SpatialGatherModule(nn.Module):
+    """Aggregate the context features according to the initial predicted
+    probability distribution.
+
+    Employ the soft-weighted method to aggregate the context.
+    """
+
+    def __init__(self, scale):
+        super(SpatialGatherModule, self).__init__()
+        self.scale = scale
+
+    def forward(self, feats, probs):
+        """Forward function."""
+        batch_size, num_classes, height, width = probs.size()
+        channels = feats.size(1)
+        probs = probs.view(batch_size, num_classes, -1)
+        feats = feats.view(batch_size, channels, -1)
+        # [batch_size, height*width, num_classes]
+        feats = feats.permute(0, 2, 1)
+        # [batch_size, channels, height*width]
+        probs = F.softmax(self.scale * probs, dim=2)
+        # [batch_size, channels, num_classes]
+        ocr_context = torch.matmul(probs, feats)
+        ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3)
+        return ocr_context
+
+
+class ObjectAttentionBlock(_SelfAttentionBlock):
+    """Make a OCR used SelfAttentionBlock."""
+
+    def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg,
+                 act_cfg):
+        if scale > 1:
+            query_downsample = nn.MaxPool2d(kernel_size=scale)
+        else:
+            query_downsample = None
+        super(ObjectAttentionBlock, self).__init__(
+            key_in_channels=in_channels,
+            query_in_channels=in_channels,
+            channels=channels,
+            out_channels=in_channels,
+            share_key_query=False,
+            query_downsample=query_downsample,
+            key_downsample=None,
+            key_query_num_convs=2,
+            key_query_norm=True,
+            value_out_num_convs=1,
+            value_out_norm=True,
+            matmul_norm=True,
+            with_out=True,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        self.bottleneck = ConvModule(
+            in_channels * 2,
+            in_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, query_feats, key_feats):
+        """Forward function."""
+        context = super(ObjectAttentionBlock,
+                        self).forward(query_feats, key_feats)
+        output = self.bottleneck(torch.cat([context, query_feats], dim=1))
+        if self.query_downsample is not None:
+            output = resize(query_feats)
+
+        return output
+
+
+@HEADS.register_module()
+class OCRHead(BaseCascadeDecodeHead):
+    """Object-Contextual Representations for Semantic Segmentation.
+
+    This head is the implementation of `OCRNet
+    <https://arxiv.org/abs/1909.11065>`_.
+
+    Args:
+        ocr_channels (int): The intermediate channels of OCR block.
+        scale (int): The scale of probability map in SpatialGatherModule in
+            Default: 1.
+    """
+
+    def __init__(self, ocr_channels, scale=1, **kwargs):
+        super(OCRHead, self).__init__(**kwargs)
+        self.ocr_channels = ocr_channels
+        self.scale = scale
+        self.object_context_block = ObjectAttentionBlock(
+            self.channels,
+            self.ocr_channels,
+            self.scale,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.spatial_gather_module = SpatialGatherModule(self.scale)
+
+        self.bottleneck = ConvModule(
+            self.in_channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs, prev_output):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        feats = self.bottleneck(x)
+        context = self.spatial_gather_module(feats, prev_output)
+        object_context = self.object_context_block(feats, context)
+        output = self.cls_seg(object_context)
+
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/point_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/point_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6607d7178e4250d8fb6e352cda3474472f0accb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/point_head.py
@@ -0,0 +1,349 @@
+# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py  # noqa
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, normal_init
+#from mmcv.ops import point_sample
+
+from mmseg.models.builder import HEADS
+from mmseg.ops import resize
+from ..losses import accuracy
+from .cascade_decode_head import BaseCascadeDecodeHead
+
+
+def calculate_uncertainty(seg_logits):
+    """Estimate uncertainty based on seg logits.
+
+    For each location of the prediction ``seg_logits`` we estimate
+    uncertainty as the difference between top first and top second
+    predicted logits.
+
+    Args:
+        seg_logits (Tensor): Semantic segmentation logits,
+            shape (batch_size, num_classes, height, width).
+
+    Returns:
+        scores (Tensor): T uncertainty scores with the most uncertain
+            locations having the highest uncertainty score, shape (
+            batch_size, 1, height, width)
+    """
+    top2_scores = torch.topk(seg_logits, k=2, dim=1)[0]
+    return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1)
+
+
+@HEADS.register_module()
+class PointHead(BaseCascadeDecodeHead):
+    """A mask point head use in PointRend.
+
+    ``PointHead`` use shared multi-layer perceptron (equivalent to
+    nn.Conv1d) to predict the logit of input points. The fine-grained feature
+    and coarse feature will be concatenate together for predication.
+
+    Args:
+        num_fcs (int): Number of fc layers in the head. Default: 3.
+        in_channels (int): Number of input channels. Default: 256.
+        fc_channels (int): Number of fc channels. Default: 256.
+        num_classes (int): Number of classes for logits. Default: 80.
+        class_agnostic (bool): Whether use class agnostic classification.
+            If so, the output channels of logits will be 1. Default: False.
+        coarse_pred_each_layer (bool): Whether concatenate coarse feature with
+            the output of each fc layer. Default: True.
+        conv_cfg (dict|None): Dictionary to construct and config conv layer.
+            Default: dict(type='Conv1d'))
+        norm_cfg (dict|None): Dictionary to construct and config norm layer.
+            Default: None.
+        loss_point (dict): Dictionary to construct and config loss layer of
+            point head. Default: dict(type='CrossEntropyLoss', use_mask=True,
+            loss_weight=1.0).
+    """
+
+    def __init__(self,
+                 num_fcs=3,
+                 coarse_pred_each_layer=True,
+                 conv_cfg=dict(type='Conv1d'),
+                 norm_cfg=None,
+                 act_cfg=dict(type='ReLU', inplace=False),
+                 **kwargs):
+        super(PointHead, self).__init__(
+            input_transform='multiple_select',
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            **kwargs)
+
+        self.num_fcs = num_fcs
+        self.coarse_pred_each_layer = coarse_pred_each_layer
+
+        fc_in_channels = sum(self.in_channels) + self.num_classes
+        fc_channels = self.channels
+        self.fcs = nn.ModuleList()
+        for k in range(num_fcs):
+            fc = ConvModule(
+                fc_in_channels,
+                fc_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+            self.fcs.append(fc)
+            fc_in_channels = fc_channels
+            fc_in_channels += self.num_classes if self.coarse_pred_each_layer \
+                else 0
+        self.fc_seg = nn.Conv1d(
+            fc_in_channels,
+            self.num_classes,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        if self.dropout_ratio > 0:
+            self.dropout = nn.Dropout(self.dropout_ratio)
+        delattr(self, 'conv_seg')
+
+    def init_weights(self):
+        """Initialize weights of classification layer."""
+        normal_init(self.fc_seg, std=0.001)
+
+    def cls_seg(self, feat):
+        """Classify each pixel with fc."""
+        if self.dropout is not None:
+            feat = self.dropout(feat)
+        output = self.fc_seg(feat)
+        return output
+
+    def forward(self, fine_grained_point_feats, coarse_point_feats):
+        x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1)
+        for fc in self.fcs:
+            x = fc(x)
+            if self.coarse_pred_each_layer:
+                x = torch.cat((x, coarse_point_feats), dim=1)
+        return self.cls_seg(x)
+
+    def _get_fine_grained_point_feats(self, x, points):
+        """Sample from fine grained features.
+
+        Args:
+            x (list[Tensor]): Feature pyramid from by neck or backbone.
+            points (Tensor): Point coordinates, shape (batch_size,
+                num_points, 2).
+
+        Returns:
+            fine_grained_feats (Tensor): Sampled fine grained feature,
+                shape (batch_size, sum(channels of x), num_points).
+        """
+
+        fine_grained_feats_list = [
+            point_sample(_, points, align_corners=self.align_corners)
+            for _ in x
+        ]
+        if len(fine_grained_feats_list) > 1:
+            fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1)
+        else:
+            fine_grained_feats = fine_grained_feats_list[0]
+
+        return fine_grained_feats
+
+    def _get_coarse_point_feats(self, prev_output, points):
+        """Sample from fine grained features.
+
+        Args:
+            prev_output (list[Tensor]): Prediction of previous decode head.
+            points (Tensor): Point coordinates, shape (batch_size,
+                num_points, 2).
+
+        Returns:
+            coarse_feats (Tensor): Sampled coarse feature, shape (batch_size,
+                num_classes, num_points).
+        """
+
+        coarse_feats = point_sample(
+            prev_output, points, align_corners=self.align_corners)
+
+        return coarse_feats
+
+    def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg,
+                      train_cfg):
+        """Forward function for training.
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+            prev_output (Tensor): The output of previous decode head.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            gt_semantic_seg (Tensor): Semantic segmentation masks
+                used if the architecture supports semantic segmentation task.
+            train_cfg (dict): The training config.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+        x = self._transform_inputs(inputs)
+        with torch.no_grad():
+            points = self.get_points_train(
+                prev_output, calculate_uncertainty, cfg=train_cfg)
+        fine_grained_point_feats = self._get_fine_grained_point_feats(
+            x, points)
+        coarse_point_feats = self._get_coarse_point_feats(prev_output, points)
+        point_logits = self.forward(fine_grained_point_feats,
+                                    coarse_point_feats)
+        point_label = point_sample(
+            gt_semantic_seg.float(),
+            points,
+            mode='nearest',
+            align_corners=self.align_corners)
+        point_label = point_label.squeeze(1).long()
+
+        losses = self.losses(point_logits, point_label)
+
+        return losses
+
+    def forward_test(self, inputs, prev_output, img_metas, test_cfg):
+        """Forward function for testing.
+
+        Args:
+            inputs (list[Tensor]): List of multi-level img features.
+            prev_output (Tensor): The output of previous decode head.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            test_cfg (dict): The testing config.
+
+        Returns:
+            Tensor: Output segmentation map.
+        """
+
+        x = self._transform_inputs(inputs)
+        refined_seg_logits = prev_output.clone()
+        for _ in range(test_cfg.subdivision_steps):
+            refined_seg_logits = resize(
+                refined_seg_logits,
+                scale_factor=test_cfg.scale_factor,
+                mode='bilinear',
+                align_corners=self.align_corners)
+            batch_size, channels, height, width = refined_seg_logits.shape
+            point_indices, points = self.get_points_test(
+                refined_seg_logits, calculate_uncertainty, cfg=test_cfg)
+            fine_grained_point_feats = self._get_fine_grained_point_feats(
+                x, points)
+            coarse_point_feats = self._get_coarse_point_feats(
+                prev_output, points)
+            point_logits = self.forward(fine_grained_point_feats,
+                                        coarse_point_feats)
+
+            point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1)
+            refined_seg_logits = refined_seg_logits.reshape(
+                batch_size, channels, height * width)
+            refined_seg_logits = refined_seg_logits.scatter_(
+                2, point_indices, point_logits)
+            refined_seg_logits = refined_seg_logits.view(
+                batch_size, channels, height, width)
+
+        return refined_seg_logits
+
+    def losses(self, point_logits, point_label):
+        """Compute segmentation loss."""
+        loss = dict()
+        loss['loss_point'] = self.loss_decode(
+            point_logits, point_label, ignore_index=self.ignore_index)
+        loss['acc_point'] = accuracy(point_logits, point_label)
+        return loss
+
+    def get_points_train(self, seg_logits, uncertainty_func, cfg):
+        """Sample points for training.
+
+        Sample points in [0, 1] x [0, 1] coordinate space based on their
+        uncertainty. The uncertainties are calculated for each point using
+        'uncertainty_func' function that takes point's logit prediction as
+        input.
+
+        Args:
+            seg_logits (Tensor): Semantic segmentation logits, shape (
+                batch_size, num_classes, height, width).
+            uncertainty_func (func): uncertainty calculation function.
+            cfg (dict): Training config of point head.
+
+        Returns:
+            point_coords (Tensor): A tensor of shape (batch_size, num_points,
+                2) that contains the coordinates of ``num_points`` sampled
+                points.
+        """
+        num_points = cfg.num_points
+        oversample_ratio = cfg.oversample_ratio
+        importance_sample_ratio = cfg.importance_sample_ratio
+        assert oversample_ratio >= 1
+        assert 0 <= importance_sample_ratio <= 1
+        batch_size = seg_logits.shape[0]
+        num_sampled = int(num_points * oversample_ratio)
+        point_coords = torch.rand(
+            batch_size, num_sampled, 2, device=seg_logits.device)
+        point_logits = point_sample(seg_logits, point_coords)
+        # It is crucial to calculate uncertainty based on the sampled
+        # prediction value for the points. Calculating uncertainties of the
+        # coarse predictions first and sampling them for points leads to
+        # incorrect results.  To illustrate this: assume uncertainty func(
+        # logits)=-abs(logits), a sampled point between two coarse
+        # predictions with -1 and 1 logits has 0 logits, and therefore 0
+        # uncertainty value. However, if we calculate uncertainties for the
+        # coarse predictions first, both will have -1 uncertainty,
+        # and sampled point will get -1 uncertainty.
+        point_uncertainties = uncertainty_func(point_logits)
+        num_uncertain_points = int(importance_sample_ratio * num_points)
+        num_random_points = num_points - num_uncertain_points
+        idx = torch.topk(
+            point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1]
+        shift = num_sampled * torch.arange(
+            batch_size, dtype=torch.long, device=seg_logits.device)
+        idx += shift[:, None]
+        point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(
+            batch_size, num_uncertain_points, 2)
+        if num_random_points > 0:
+            rand_point_coords = torch.rand(
+                batch_size, num_random_points, 2, device=seg_logits.device)
+            point_coords = torch.cat((point_coords, rand_point_coords), dim=1)
+        return point_coords
+
+    def get_points_test(self, seg_logits, uncertainty_func, cfg):
+        """Sample points for testing.
+
+        Find ``num_points`` most uncertain points from ``uncertainty_map``.
+
+        Args:
+            seg_logits (Tensor): A tensor of shape (batch_size, num_classes,
+                height, width) for class-specific or class-agnostic prediction.
+            uncertainty_func (func): uncertainty calculation function.
+            cfg (dict): Testing config of point head.
+
+        Returns:
+            point_indices (Tensor): A tensor of shape (batch_size, num_points)
+                that contains indices from [0, height x width) of the most
+                uncertain points.
+            point_coords (Tensor): A tensor of shape (batch_size, num_points,
+                2) that contains [0, 1] x [0, 1] normalized coordinates of the
+                most uncertain points from the ``height x width`` grid .
+        """
+
+        num_points = cfg.subdivision_num_points
+        uncertainty_map = uncertainty_func(seg_logits)
+        batch_size, _, height, width = uncertainty_map.shape
+        h_step = 1.0 / height
+        w_step = 1.0 / width
+
+        uncertainty_map = uncertainty_map.view(batch_size, height * width)
+        num_points = min(height * width, num_points)
+        point_indices = uncertainty_map.topk(num_points, dim=1)[1]
+        point_coords = torch.zeros(
+            batch_size,
+            num_points,
+            2,
+            dtype=torch.float,
+            device=seg_logits.device)
+        point_coords[:, :, 0] = w_step / 2.0 + (point_indices %
+                                                width).float() * w_step
+        point_coords[:, :, 1] = h_step / 2.0 + (point_indices //
+                                                width).float() * h_step
+        return point_indices, point_coords
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/psa_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/psa_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d915e57f4d22bd723da23b6f03f245476649846
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/psa_head.py
@@ -0,0 +1,196 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+try:
+    from mmcv.ops import PSAMask
+except ModuleNotFoundError:
+    PSAMask = None
+
+
+@HEADS.register_module()
+class PSAHead(BaseDecodeHead):
+    """Point-wise Spatial Attention Network for Scene Parsing.
+
+    This head is the implementation of `PSANet
+    <https://hszhao.github.io/papers/eccv18_psanet.pdf>`_.
+
+    Args:
+        mask_size (tuple[int]): The PSA mask size. It usually equals input
+            size.
+        psa_type (str): The type of psa module. Options are 'collect',
+            'distribute', 'bi-direction'. Default: 'bi-direction'
+        compact (bool): Whether use compact map for 'collect' mode.
+            Default: True.
+        shrink_factor (int): The downsample factors of psa mask. Default: 2.
+        normalization_factor (float): The normalize factor of attention.
+        psa_softmax (bool): Whether use softmax for attention.
+    """
+
+    def __init__(self,
+                 mask_size,
+                 psa_type='bi-direction',
+                 compact=False,
+                 shrink_factor=2,
+                 normalization_factor=1.0,
+                 psa_softmax=True,
+                 **kwargs):
+        if PSAMask is None:
+            raise RuntimeError('Please install mmcv-full for PSAMask ops')
+        super(PSAHead, self).__init__(**kwargs)
+        assert psa_type in ['collect', 'distribute', 'bi-direction']
+        self.psa_type = psa_type
+        self.compact = compact
+        self.shrink_factor = shrink_factor
+        self.mask_size = mask_size
+        mask_h, mask_w = mask_size
+        self.psa_softmax = psa_softmax
+        if normalization_factor is None:
+            normalization_factor = mask_h * mask_w
+        self.normalization_factor = normalization_factor
+
+        self.reduce = ConvModule(
+            self.in_channels,
+            self.channels,
+            kernel_size=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.attention = nn.Sequential(
+            ConvModule(
+                self.channels,
+                self.channels,
+                kernel_size=1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            nn.Conv2d(
+                self.channels, mask_h * mask_w, kernel_size=1, bias=False))
+        if psa_type == 'bi-direction':
+            self.reduce_p = ConvModule(
+                self.in_channels,
+                self.channels,
+                kernel_size=1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+            self.attention_p = nn.Sequential(
+                ConvModule(
+                    self.channels,
+                    self.channels,
+                    kernel_size=1,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg),
+                nn.Conv2d(
+                    self.channels, mask_h * mask_w, kernel_size=1, bias=False))
+            self.psamask_collect = PSAMask('collect', mask_size)
+            self.psamask_distribute = PSAMask('distribute', mask_size)
+        else:
+            self.psamask = PSAMask(psa_type, mask_size)
+        self.proj = ConvModule(
+            self.channels * (2 if psa_type == 'bi-direction' else 1),
+            self.in_channels,
+            kernel_size=1,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.bottleneck = ConvModule(
+            self.in_channels * 2,
+            self.channels,
+            kernel_size=3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        identity = x
+        align_corners = self.align_corners
+        if self.psa_type in ['collect', 'distribute']:
+            out = self.reduce(x)
+            n, c, h, w = out.size()
+            if self.shrink_factor != 1:
+                if h % self.shrink_factor and w % self.shrink_factor:
+                    h = (h - 1) // self.shrink_factor + 1
+                    w = (w - 1) // self.shrink_factor + 1
+                    align_corners = True
+                else:
+                    h = h // self.shrink_factor
+                    w = w // self.shrink_factor
+                    align_corners = False
+                out = resize(
+                    out,
+                    size=(h, w),
+                    mode='bilinear',
+                    align_corners=align_corners)
+            y = self.attention(out)
+            if self.compact:
+                if self.psa_type == 'collect':
+                    y = y.view(n, h * w,
+                               h * w).transpose(1, 2).view(n, h * w, h, w)
+            else:
+                y = self.psamask(y)
+            if self.psa_softmax:
+                y = F.softmax(y, dim=1)
+            out = torch.bmm(
+                out.view(n, c, h * w), y.view(n, h * w, h * w)).view(
+                    n, c, h, w) * (1.0 / self.normalization_factor)
+        else:
+            x_col = self.reduce(x)
+            x_dis = self.reduce_p(x)
+            n, c, h, w = x_col.size()
+            if self.shrink_factor != 1:
+                if h % self.shrink_factor and w % self.shrink_factor:
+                    h = (h - 1) // self.shrink_factor + 1
+                    w = (w - 1) // self.shrink_factor + 1
+                    align_corners = True
+                else:
+                    h = h // self.shrink_factor
+                    w = w // self.shrink_factor
+                    align_corners = False
+                x_col = resize(
+                    x_col,
+                    size=(h, w),
+                    mode='bilinear',
+                    align_corners=align_corners)
+                x_dis = resize(
+                    x_dis,
+                    size=(h, w),
+                    mode='bilinear',
+                    align_corners=align_corners)
+            y_col = self.attention(x_col)
+            y_dis = self.attention_p(x_dis)
+            if self.compact:
+                y_dis = y_dis.view(n, h * w,
+                                   h * w).transpose(1, 2).view(n, h * w, h, w)
+            else:
+                y_col = self.psamask_collect(y_col)
+                y_dis = self.psamask_distribute(y_dis)
+            if self.psa_softmax:
+                y_col = F.softmax(y_col, dim=1)
+                y_dis = F.softmax(y_dis, dim=1)
+            x_col = torch.bmm(
+                x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view(
+                    n, c, h, w) * (1.0 / self.normalization_factor)
+            x_dis = torch.bmm(
+                x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view(
+                    n, c, h, w) * (1.0 / self.normalization_factor)
+            out = torch.cat([x_col, x_dis], 1)
+        out = self.proj(out)
+        out = resize(
+            out,
+            size=identity.shape[2:],
+            mode='bilinear',
+            align_corners=align_corners)
+        out = self.bottleneck(torch.cat((identity, out), dim=1))
+        out = self.cls_seg(out)
+        return out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/psp_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/psp_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdbe2c8ac8dc2d21dd3e21aa5ed9f74504545c62
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/psp_head.py
@@ -0,0 +1,101 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+
+
+class PPM(nn.ModuleList):
+    """Pooling Pyramid Module used in PSPNet.
+
+    Args:
+        pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module.
+        in_channels (int): Input channels.
+        channels (int): Channels after modules, before conv_seg.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict): Config of activation layers.
+        align_corners (bool): align_corners argument of F.interpolate.
+    """
+
+    def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg,
+                 act_cfg, align_corners):
+        super(PPM, self).__init__()
+        self.pool_scales = pool_scales
+        self.align_corners = align_corners
+        self.in_channels = in_channels
+        self.channels = channels
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        for pool_scale in pool_scales:
+            self.append(
+                nn.Sequential(
+                    nn.AdaptiveAvgPool2d(pool_scale),
+                    ConvModule(
+                        self.in_channels,
+                        self.channels,
+                        1,
+                        conv_cfg=self.conv_cfg,
+                        norm_cfg=self.norm_cfg,
+                        act_cfg=self.act_cfg)))
+
+    def forward(self, x):
+        """Forward function."""
+        ppm_outs = []
+        for ppm in self:
+            ppm_out = ppm(x)
+            upsampled_ppm_out = resize(
+                ppm_out,
+                size=x.size()[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+            ppm_outs.append(upsampled_ppm_out)
+        return ppm_outs
+
+
+@HEADS.register_module()
+class PSPHead(BaseDecodeHead):
+    """Pyramid Scene Parsing Network.
+
+    This head is the implementation of
+    `PSPNet <https://arxiv.org/abs/1612.01105>`_.
+
+    Args:
+        pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module. Default: (1, 2, 3, 6).
+    """
+
+    def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+        super(PSPHead, self).__init__(**kwargs)
+        assert isinstance(pool_scales, (list, tuple))
+        self.pool_scales = pool_scales
+        self.psp_modules = PPM(
+            self.pool_scales,
+            self.in_channels,
+            self.channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=self.align_corners)
+        self.bottleneck = ConvModule(
+            self.in_channels + len(pool_scales) * self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        psp_outs = [x]
+        psp_outs.extend(self.psp_modules(x))
+        psp_outs = torch.cat(psp_outs, dim=1)
+        output = self.bottleneck(psp_outs)
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/segformer_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/segformer_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..437f37adf7e245a259d9ce453a1ad030082b8403
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/segformer_head.py
@@ -0,0 +1,100 @@
+# ---------------------------------------------------------------
+# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+#
+# This work is licensed under the NVIDIA Source Code License
+# ---------------------------------------------------------------
+import numpy as np
+import os
+import torch.nn as nn
+import torch
+from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
+from collections import OrderedDict
+
+from mmseg.ops import resize, resize2
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+from mmseg.models.utils import *
+import attr
+
+#from IPython import embed
+
+class NpuLinear(nn.Linear):
+    def forward(self, input):
+        return torch.npu_linear(input, self.weight, self.bias)        
+
+
+class MLP(nn.Module):
+    """
+    Linear Embedding
+    """
+    def __init__(self, input_dim=2048, embed_dim=768):
+        super().__init__()
+        self.proj = nn.Conv2d(input_dim, embed_dim, 1, 1, 0, bias=True) 
+
+    def forward(self, x):
+        x = self.proj(x)
+        return x
+
+
+@HEADS.register_module()
+class SegFormerHead(BaseDecodeHead):
+    """
+    SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers
+    """
+    def __init__(self, feature_strides, **kwargs):
+        super(SegFormerHead, self).__init__(input_transform='multiple_select', **kwargs)
+        assert len(feature_strides) == len(self.in_channels)
+        assert min(feature_strides) == feature_strides[0]
+        self.feature_strides = feature_strides
+
+        c1_in_channels, c2_in_channels, c3_in_channels, c4_in_channels = self.in_channels
+
+        decoder_params = kwargs['decoder_params']
+        embedding_dim = decoder_params['embed_dim']
+
+        self.linear_c4 = MLP(input_dim=c4_in_channels, embed_dim=embedding_dim)
+        self.linear_c3 = MLP(input_dim=c3_in_channels, embed_dim=embedding_dim)
+        self.linear_c2 = MLP(input_dim=c2_in_channels, embed_dim=embedding_dim)
+        self.linear_c1 = MLP(input_dim=c1_in_channels, embed_dim=embedding_dim)
+        
+        if "WORLD_SIZE" in os.environ and int(os.environ["WORLD_SIZE"]) > 1:
+            bn_type = 'SyncBN'
+        else:
+            bn_type = 'BN'
+            
+        self.linear_fuse = ConvModule(
+            in_channels=embedding_dim*4,
+            out_channels=embedding_dim,
+            kernel_size=1,
+            norm_cfg=dict(type=bn_type, requires_grad=True)
+        )
+
+        self.linear_pred = nn.Conv2d(embedding_dim, self.num_classes, kernel_size=1)
+        self.up2 = resize2(2, embedding_dim)
+        self.up4 = resize2(4, embedding_dim)
+        self.up8 = resize2(8, embedding_dim)
+
+    def forward(self, inputs):
+        x = self._transform_inputs(inputs)  # len=4, 1/4,1/8,1/16,1/32
+        c1, c2, c3, c4 = x
+
+        ############## MLP decoder on C1-C4 ###########
+        n, _, _, _ = c4.shape
+
+        _c4 = self.linear_c4(c4)
+        _c4 = self.up8(_c4) 
+
+        _c3 = self.linear_c3(c3)
+        _c3 = self.up4(_c3) 
+
+        _c2 = self.linear_c2(c2)
+        _c2 = self.up2(_c2) 
+
+        _c1 = self.linear_c1(c1)
+
+        _c = self.linear_fuse(torch.cat([_c4, _c3, _c2, _c1], dim=1))
+
+        x = self.dropout(_c)
+        x = self.linear_pred(x)
+
+        return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/sep_aspp_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/sep_aspp_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..50bd52bcff62d0f791c42731bdf05a64276f50b9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/sep_aspp_head.py
@@ -0,0 +1,101 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .aspp_head import ASPPHead, ASPPModule
+
+
+class DepthwiseSeparableASPPModule(ASPPModule):
+    """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable
+    conv."""
+
+    def __init__(self, **kwargs):
+        super(DepthwiseSeparableASPPModule, self).__init__(**kwargs)
+        for i, dilation in enumerate(self.dilations):
+            if dilation > 1:
+                self[i] = DepthwiseSeparableConvModule(
+                    self.in_channels,
+                    self.channels,
+                    3,
+                    dilation=dilation,
+                    padding=dilation,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg)
+
+
+@HEADS.register_module()
+class DepthwiseSeparableASPPHead(ASPPHead):
+    """Encoder-Decoder with Atrous Separable Convolution for Semantic Image
+    Segmentation.
+
+    This head is the implementation of `DeepLabV3+
+    <https://arxiv.org/abs/1802.02611>`_.
+
+    Args:
+        c1_in_channels (int): The input channels of c1 decoder. If is 0,
+            the no decoder will be used.
+        c1_channels (int): The intermediate channels of c1 decoder.
+    """
+
+    def __init__(self, c1_in_channels, c1_channels, **kwargs):
+        super(DepthwiseSeparableASPPHead, self).__init__(**kwargs)
+        assert c1_in_channels >= 0
+        self.aspp_modules = DepthwiseSeparableASPPModule(
+            dilations=self.dilations,
+            in_channels=self.in_channels,
+            channels=self.channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        if c1_in_channels > 0:
+            self.c1_bottleneck = ConvModule(
+                c1_in_channels,
+                c1_channels,
+                1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg)
+        else:
+            self.c1_bottleneck = None
+        self.sep_bottleneck = nn.Sequential(
+            DepthwiseSeparableConvModule(
+                self.channels + c1_channels,
+                self.channels,
+                3,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg),
+            DepthwiseSeparableConvModule(
+                self.channels,
+                self.channels,
+                3,
+                padding=1,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg))
+
+    def forward(self, inputs):
+        """Forward function."""
+        x = self._transform_inputs(inputs)
+        aspp_outs = [
+            resize(
+                self.image_pool(x),
+                size=x.size()[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+        ]
+        aspp_outs.extend(self.aspp_modules(x))
+        aspp_outs = torch.cat(aspp_outs, dim=1)
+        output = self.bottleneck(aspp_outs)
+        if self.c1_bottleneck is not None:
+            c1_output = self.c1_bottleneck(inputs[0])
+            output = resize(
+                input=output,
+                size=c1_output.shape[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+            output = torch.cat([output, c1_output], dim=1)
+        output = self.sep_bottleneck(output)
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/sep_fcn_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/sep_fcn_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..a636f702e72d12fd6ff2821fd10923f682f805f1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/sep_fcn_head.py
@@ -0,0 +1,51 @@
+from mmcv.cnn import DepthwiseSeparableConvModule
+
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+@HEADS.register_module()
+class DepthwiseSeparableFCNHead(FCNHead):
+    """Depthwise-Separable Fully Convolutional Network for Semantic
+    Segmentation.
+
+    This head is implemented according to Fast-SCNN paper.
+    Args:
+        in_channels(int): Number of output channels of FFM.
+        channels(int): Number of middle-stage channels in the decode head.
+        concat_input(bool): Whether to concatenate original decode input into
+            the result of several consecutive convolution layers.
+            Default: True.
+        num_classes(int): Used to determine the dimension of
+            final prediction tensor.
+        in_index(int): Correspond with 'out_indices' in FastSCNN backbone.
+        norm_cfg (dict | None): Config of norm layers.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        loss_decode(dict): Config of loss type and some
+            relevant additional options.
+    """
+
+    def __init__(self, **kwargs):
+        super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
+        self.convs[0] = DepthwiseSeparableConvModule(
+            self.in_channels,
+            self.channels,
+            kernel_size=self.kernel_size,
+            padding=self.kernel_size // 2,
+            norm_cfg=self.norm_cfg)
+        for i in range(1, self.num_convs):
+            self.convs[i] = DepthwiseSeparableConvModule(
+                self.channels,
+                self.channels,
+                kernel_size=self.kernel_size,
+                padding=self.kernel_size // 2,
+                norm_cfg=self.norm_cfg)
+
+        if self.concat_input:
+            self.conv_cat = DepthwiseSeparableConvModule(
+                self.in_channels + self.channels,
+                self.channels,
+                kernel_size=self.kernel_size,
+                padding=self.kernel_size // 2,
+                norm_cfg=self.norm_cfg)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/uper_head.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/uper_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb617f6b13a1b359b0fa932300161e0d405d046d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/decode_heads/uper_head.py
@@ -0,0 +1,126 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from mmseg.ops import resize
+from ..builder import HEADS
+from .decode_head import BaseDecodeHead
+from .psp_head import PPM
+
+
+@HEADS.register_module()
+class UPerHead(BaseDecodeHead):
+    """Unified Perceptual Parsing for Scene Understanding.
+
+    This head is the implementation of `UPerNet
+    <https://arxiv.org/abs/1807.10221>`_.
+
+    Args:
+        pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+            Module applied on the last feature. Default: (1, 2, 3, 6).
+    """
+
+    def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+        super(UPerHead, self).__init__(
+            input_transform='multiple_select', **kwargs)
+        # PSP Module
+        self.psp_modules = PPM(
+            pool_scales,
+            self.in_channels[-1],
+            self.channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=self.align_corners)
+        self.bottleneck = ConvModule(
+            self.in_channels[-1] + len(pool_scales) * self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        # FPN Module
+        self.lateral_convs = nn.ModuleList()
+        self.fpn_convs = nn.ModuleList()
+        for in_channels in self.in_channels[:-1]:  # skip the top layer
+            l_conv = ConvModule(
+                in_channels,
+                self.channels,
+                1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg,
+                inplace=False)
+            fpn_conv = ConvModule(
+                self.channels,
+                self.channels,
+                3,
+                padding=1,
+                conv_cfg=self.conv_cfg,
+                norm_cfg=self.norm_cfg,
+                act_cfg=self.act_cfg,
+                inplace=False)
+            self.lateral_convs.append(l_conv)
+            self.fpn_convs.append(fpn_conv)
+
+        self.fpn_bottleneck = ConvModule(
+            len(self.in_channels) * self.channels,
+            self.channels,
+            3,
+            padding=1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def psp_forward(self, inputs):
+        """Forward function of PSP module."""
+        x = inputs[-1]
+        psp_outs = [x]
+        psp_outs.extend(self.psp_modules(x))
+        psp_outs = torch.cat(psp_outs, dim=1)
+        output = self.bottleneck(psp_outs)
+
+        return output
+
+    def forward(self, inputs):
+        """Forward function."""
+
+        inputs = self._transform_inputs(inputs)
+
+        # build laterals
+        laterals = [
+            lateral_conv(inputs[i])
+            for i, lateral_conv in enumerate(self.lateral_convs)
+        ]
+
+        laterals.append(self.psp_forward(inputs))
+
+        # build top-down path
+        used_backbone_levels = len(laterals)
+        for i in range(used_backbone_levels - 1, 0, -1):
+            prev_shape = laterals[i - 1].shape[2:]
+            laterals[i - 1] += resize(
+                laterals[i],
+                size=prev_shape,
+                mode='bilinear',
+                align_corners=self.align_corners)
+
+        # build outputs
+        fpn_outs = [
+            self.fpn_convs[i](laterals[i])
+            for i in range(used_backbone_levels - 1)
+        ]
+        # append psp feature
+        fpn_outs.append(laterals[-1])
+
+        for i in range(used_backbone_levels - 1, 0, -1):
+            fpn_outs[i] = resize(
+                fpn_outs[i],
+                size=fpn_outs[0].shape[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+        fpn_outs = torch.cat(fpn_outs, dim=1)
+        output = self.fpn_bottleneck(fpn_outs)
+        output = self.cls_seg(output)
+        return output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d623887760654c5c07fbdb2c76012baa2f9a4b52
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/__init__.py
@@ -0,0 +1,11 @@
+from .accuracy import Accuracy, accuracy
+from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
+                                 cross_entropy, mask_cross_entropy)
+from .lovasz_loss import LovaszLoss
+from .utils import reduce_loss, weight_reduce_loss, weighted_loss
+
+__all__ = [
+    'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
+    'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
+    'weight_reduce_loss', 'weighted_loss', 'LovaszLoss'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/accuracy.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..e45f9ec485737ef1f6717eaf3b6ddc572a169932
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/accuracy.py
@@ -0,0 +1,78 @@
+import torch.nn as nn
+
+
+def accuracy(pred, target, topk=1, thresh=None):
+    """Calculate accuracy according to the prediction and target.
+
+    Args:
+        pred (torch.Tensor): The model prediction, shape (N, num_class, ...)
+        target (torch.Tensor): The target of each prediction, shape (N, , ...)
+        topk (int | tuple[int], optional): If the predictions in ``topk``
+            matches the target, the predictions will be regarded as
+            correct ones. Defaults to 1.
+        thresh (float, optional): If not None, predictions with scores under
+            this threshold are considered incorrect. Default to None.
+
+    Returns:
+        float | tuple[float]: If the input ``topk`` is a single integer,
+            the function will return a single float as accuracy. If
+            ``topk`` is a tuple containing multiple integers, the
+            function will return a tuple containing accuracies of
+            each ``topk`` number.
+    """
+    assert isinstance(topk, (int, tuple))
+    if isinstance(topk, int):
+        topk = (topk, )
+        return_single = True
+    else:
+        return_single = False
+
+    maxk = max(topk)
+    if pred.size(0) == 0:
+        accu = [pred.new_tensor(0.) for i in range(len(topk))]
+        return accu[0] if return_single else accu
+    assert pred.ndim == target.ndim + 1
+    assert pred.size(0) == target.size(0)
+    assert maxk <= pred.size(1), \
+        f'maxk {maxk} exceeds pred dimension {pred.size(1)}'
+    pred_value, pred_label = pred.topk(maxk, dim=1)
+    # transpose to shape (maxk, N, ...)
+    pred_label = pred_label.transpose(0, 1)
+    correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label))
+    if thresh is not None:
+        # Only prediction values larger than thresh are counted as correct
+        correct = correct & (pred_value > thresh).t()
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+        res.append(correct_k.mul_(100.0 / target.numel()))
+    return res[0] if return_single else res
+
+
+class Accuracy(nn.Module):
+    """Accuracy calculation module."""
+
+    def __init__(self, topk=(1, ), thresh=None):
+        """Module to calculate the accuracy.
+
+        Args:
+            topk (tuple, optional): The criterion used to calculate the
+                accuracy. Defaults to (1,).
+            thresh (float, optional): If not None, predictions with scores
+                under this threshold are considered incorrect. Default to None.
+        """
+        super().__init__()
+        self.topk = topk
+        self.thresh = thresh
+
+    def forward(self, pred, target):
+        """Forward function to calculate accuracy.
+
+        Args:
+            pred (torch.Tensor): Prediction of models.
+            target (torch.Tensor): Target for each prediction.
+
+        Returns:
+            tuple[float]: The accuracies under different topk criterions.
+        """
+        return accuracy(pred, target, self.topk, self.thresh)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/cross_entropy_loss.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/cross_entropy_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..8707a829bd0a25a5fbc53b5450d2d3eefbcb5f59
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/cross_entropy_loss.py
@@ -0,0 +1,199 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from apex import amp
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+@amp.float_function
+def cross_entropy(pred,
+                  label,
+                  weight=None,
+                  class_weight=None,
+                  reduction='mean',
+                  avg_factor=None,
+                  ignore_index=-100):
+    """The wrapper function for :func:`F.cross_entropy`"""
+    # class_weight is a manual rescaling weight given to each class.
+    # If given, has to be a Tensor of size C element-wise losses
+    loss = F.cross_entropy(
+        pred.float(),
+        label,
+        weight=class_weight,
+        reduction='none',
+        ignore_index=ignore_index)
+
+    # apply weights and do the reduction
+    if weight is not None:
+        weight = weight.float()
+    loss = weight_reduce_loss(
+        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+    return loss
+
+
+def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index):
+    """Expand onehot labels to match the size of prediction."""
+    bin_labels = labels.new_zeros(target_shape)
+    valid_mask = (labels >= 0) & (labels != ignore_index)
+    inds = torch.nonzero(valid_mask, as_tuple=True)
+
+    if inds[0].numel() > 0:
+        if labels.dim() == 3:
+            bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1
+        else:
+            bin_labels[inds[0], labels[valid_mask]] = 1
+
+    valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float()
+    if label_weights is None:
+        bin_label_weights = valid_mask
+    else:
+        bin_label_weights = label_weights.unsqueeze(1).expand(target_shape)
+        bin_label_weights *= valid_mask
+
+    return bin_labels, bin_label_weights
+
+
+def binary_cross_entropy(pred,
+                         label,
+                         weight=None,
+                         reduction='mean',
+                         avg_factor=None,
+                         class_weight=None,
+                         ignore_index=255):
+    """Calculate the binary CrossEntropy loss.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, 1).
+        label (torch.Tensor): The learning label of the prediction.
+        weight (torch.Tensor, optional): Sample-wise loss weight.
+        reduction (str, optional): The method used to reduce the loss.
+            Options are "none", "mean" and "sum".
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+        class_weight (list[float], optional): The weight for each class.
+        ignore_index (int | None): The label index to be ignored. Default: 255
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    if pred.dim() != label.dim():
+        assert (pred.dim() == 2 and label.dim() == 1) or (
+                pred.dim() == 4 and label.dim() == 3), \
+            'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \
+            'H, W], label shape [N, H, W] are supported'
+        label, weight = _expand_onehot_labels(label, weight, pred.shape,
+                                              ignore_index)
+
+    # weighted element-wise losses
+    if weight is not None:
+        weight = weight.float()
+    loss = F.binary_cross_entropy_with_logits(
+        pred, label.float(), pos_weight=class_weight, reduction='none')
+    # do the reduction for the weighted loss
+    loss = weight_reduce_loss(
+        loss, weight, reduction=reduction, avg_factor=avg_factor)
+
+    return loss
+
+
+def mask_cross_entropy(pred,
+                       target,
+                       label,
+                       reduction='mean',
+                       avg_factor=None,
+                       class_weight=None,
+                       ignore_index=None):
+    """Calculate the CrossEntropy loss for masks.
+
+    Args:
+        pred (torch.Tensor): The prediction with shape (N, C), C is the number
+            of classes.
+        target (torch.Tensor): The learning label of the prediction.
+        label (torch.Tensor): ``label`` indicates the class label of the mask'
+            corresponding object. This will be used to select the mask in the
+            of the class which the object belongs to when the mask prediction
+            if not class-agnostic.
+        reduction (str, optional): The method used to reduce the loss.
+            Options are "none", "mean" and "sum".
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. Defaults to None.
+        class_weight (list[float], optional): The weight for each class.
+        ignore_index (None): Placeholder, to be consistent with other loss.
+            Default: None.
+
+    Returns:
+        torch.Tensor: The calculated loss
+    """
+    assert ignore_index is None, 'BCE loss does not support ignore_index'
+    # TODO: handle these two reserved arguments
+    assert reduction == 'mean' and avg_factor is None
+    num_rois = pred.size()[0]
+    inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
+    pred_slice = pred[inds, label].squeeze(1)
+    return F.binary_cross_entropy_with_logits(
+        pred_slice, target, weight=class_weight, reduction='mean')[None]
+
+
+@LOSSES.register_module()
+class CrossEntropyLoss(nn.Module):
+    """CrossEntropyLoss.
+
+    Args:
+        use_sigmoid (bool, optional): Whether the prediction uses sigmoid
+            of softmax. Defaults to False.
+        use_mask (bool, optional): Whether to use mask cross entropy loss.
+            Defaults to False.
+        reduction (str, optional): . Defaults to 'mean'.
+            Options are "none", "mean" and "sum".
+        class_weight (list[float], optional): Weight of each class.
+            Defaults to None.
+        loss_weight (float, optional): Weight of the loss. Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 use_sigmoid=False,
+                 use_mask=False,
+                 reduction='mean',
+                 class_weight=None,
+                 loss_weight=1.0):
+        super(CrossEntropyLoss, self).__init__()
+        assert (use_sigmoid is False) or (use_mask is False)
+        self.use_sigmoid = use_sigmoid
+        self.use_mask = use_mask
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+        self.class_weight = class_weight
+
+        if self.use_sigmoid:
+            self.cls_criterion = binary_cross_entropy
+        elif self.use_mask:
+            self.cls_criterion = mask_cross_entropy
+        else:
+            self.cls_criterion = cross_entropy
+
+    def forward(self,
+                cls_score,
+                label,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None,
+                **kwargs):
+        """Forward function."""
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        if self.class_weight is not None:
+            class_weight = cls_score.new_tensor(self.class_weight)
+        else:
+            class_weight = None
+        loss_cls = self.loss_weight * self.cls_criterion(
+            cls_score,
+            label,
+            weight,
+            class_weight=class_weight,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            **kwargs)
+        return loss_cls
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/lovasz_loss.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/lovasz_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6e2450cfdc713ee1ea166886469da4541568329
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/lovasz_loss.py
@@ -0,0 +1,303 @@
+"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor
+ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim
+Berman 2018 ESAT-PSI KU Leuven (MIT License)"""
+
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..builder import LOSSES
+from .utils import weight_reduce_loss
+
+
+def lovasz_grad(gt_sorted):
+    """Computes gradient of the Lovasz extension w.r.t sorted errors.
+
+    See Alg. 1 in paper.
+    """
+    p = len(gt_sorted)
+    gts = gt_sorted.sum()
+    intersection = gts - gt_sorted.float().cumsum(0)
+    union = gts + (1 - gt_sorted).float().cumsum(0)
+    jaccard = 1. - intersection / union
+    if p > 1:  # cover 1-pixel case
+        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
+    return jaccard
+
+
+def flatten_binary_logits(logits, labels, ignore_index=None):
+    """Flattens predictions in the batch (binary case) Remove labels equal to
+    'ignore_index'."""
+    logits = logits.view(-1)
+    labels = labels.view(-1)
+    if ignore_index is None:
+        return logits, labels
+    valid = (labels != ignore_index)
+    vlogits = logits[valid]
+    vlabels = labels[valid]
+    return vlogits, vlabels
+
+
+def flatten_probs(probs, labels, ignore_index=None):
+    """Flattens predictions in the batch."""
+    if probs.dim() == 3:
+        # assumes output of a sigmoid layer
+        B, H, W = probs.size()
+        probs = probs.view(B, 1, H, W)
+    B, C, H, W = probs.size()
+    probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B*H*W, C=P,C
+    labels = labels.view(-1)
+    if ignore_index is None:
+        return probs, labels
+    valid = (labels != ignore_index)
+    vprobs = probs[valid.nonzero().squeeze()]
+    vlabels = labels[valid]
+    return vprobs, vlabels
+
+
+def lovasz_hinge_flat(logits, labels):
+    """Binary Lovasz hinge loss.
+
+    Args:
+        logits (torch.Tensor): [P], logits at each prediction
+            (between -infty and +infty).
+        labels (torch.Tensor): [P], binary ground truth labels (0 or 1).
+
+    Returns:
+        torch.Tensor: The calculated loss.
+    """
+    if len(labels) == 0:
+        # only void pixels, the gradients should be 0
+        return logits.sum() * 0.
+    signs = 2. * labels.float() - 1.
+    errors = (1. - logits * signs)
+    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
+    perm = perm.data
+    gt_sorted = labels[perm]
+    grad = lovasz_grad(gt_sorted)
+    loss = torch.dot(F.relu(errors_sorted), grad)
+    return loss
+
+
+def lovasz_hinge(logits,
+                 labels,
+                 classes='present',
+                 per_image=False,
+                 class_weight=None,
+                 reduction='mean',
+                 avg_factor=None,
+                 ignore_index=255):
+    """Binary Lovasz hinge loss.
+
+    Args:
+        logits (torch.Tensor): [B, H, W], logits at each pixel
+            (between -infty and +infty).
+        labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1).
+        classes (str | list[int], optional): Placeholder, to be consistent with
+            other loss. Default: None.
+        per_image (bool, optional): If per_image is True, compute the loss per
+            image instead of per batch. Default: False.
+        class_weight (list[float], optional): Placeholder, to be consistent
+            with other loss. Default: None.
+        reduction (str, optional): The method used to reduce the loss. Options
+            are "none", "mean" and "sum". This parameter only works when
+            per_image is True. Default: 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. This parameter only works when per_image is True.
+            Default: None.
+        ignore_index (int | None): The label index to be ignored. Default: 255.
+
+    Returns:
+        torch.Tensor: The calculated loss.
+    """
+    if per_image:
+        loss = [
+            lovasz_hinge_flat(*flatten_binary_logits(
+                logit.unsqueeze(0), label.unsqueeze(0), ignore_index))
+            for logit, label in zip(logits, labels)
+        ]
+        loss = weight_reduce_loss(
+            torch.stack(loss), None, reduction, avg_factor)
+    else:
+        loss = lovasz_hinge_flat(
+            *flatten_binary_logits(logits, labels, ignore_index))
+    return loss
+
+
+def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None):
+    """Multi-class Lovasz-Softmax loss.
+
+    Args:
+        probs (torch.Tensor): [P, C], class probabilities at each prediction
+            (between 0 and 1).
+        labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1).
+        classes (str | list[int], optional): Classes choosed to calculate loss.
+            'all' for all classes, 'present' for classes present in labels, or
+            a list of classes to average. Default: 'present'.
+        class_weight (list[float], optional): The weight for each class.
+            Default: None.
+
+    Returns:
+        torch.Tensor: The calculated loss.
+    """
+    if probs.numel() == 0:
+        # only void pixels, the gradients should be 0
+        return probs * 0.
+    C = probs.size(1)
+    losses = []
+    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
+    for c in class_to_sum:
+        fg = (labels == c).float()  # foreground for class c
+        if (classes == 'present' and fg.sum() == 0):
+            continue
+        if C == 1:
+            if len(classes) > 1:
+                raise ValueError('Sigmoid output possible only with 1 class')
+            class_pred = probs[:, 0]
+        else:
+            class_pred = probs[:, c]
+        errors = (fg - class_pred).abs()
+        errors_sorted, perm = torch.sort(errors, 0, descending=True)
+        perm = perm.data
+        fg_sorted = fg[perm]
+        loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted))
+        if class_weight is not None:
+            loss *= class_weight[c]
+        losses.append(loss)
+    return torch.stack(losses).mean()
+
+
+def lovasz_softmax(probs,
+                   labels,
+                   classes='present',
+                   per_image=False,
+                   class_weight=None,
+                   reduction='mean',
+                   avg_factor=None,
+                   ignore_index=255):
+    """Multi-class Lovasz-Softmax loss.
+
+    Args:
+        probs (torch.Tensor): [B, C, H, W], class probabilities at each
+            prediction (between 0 and 1).
+        labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and
+            C - 1).
+        classes (str | list[int], optional): Classes choosed to calculate loss.
+            'all' for all classes, 'present' for classes present in labels, or
+            a list of classes to average. Default: 'present'.
+        per_image (bool, optional): If per_image is True, compute the loss per
+            image instead of per batch. Default: False.
+        class_weight (list[float], optional): The weight for each class.
+            Default: None.
+        reduction (str, optional): The method used to reduce the loss. Options
+            are "none", "mean" and "sum". This parameter only works when
+            per_image is True. Default: 'mean'.
+        avg_factor (int, optional): Average factor that is used to average
+            the loss. This parameter only works when per_image is True.
+            Default: None.
+        ignore_index (int | None): The label index to be ignored. Default: 255.
+
+    Returns:
+        torch.Tensor: The calculated loss.
+    """
+
+    if per_image:
+        loss = [
+            lovasz_softmax_flat(
+                *flatten_probs(
+                    prob.unsqueeze(0), label.unsqueeze(0), ignore_index),
+                classes=classes,
+                class_weight=class_weight)
+            for prob, label in zip(probs, labels)
+        ]
+        loss = weight_reduce_loss(
+            torch.stack(loss), None, reduction, avg_factor)
+    else:
+        loss = lovasz_softmax_flat(
+            *flatten_probs(probs, labels, ignore_index),
+            classes=classes,
+            class_weight=class_weight)
+    return loss
+
+
+@LOSSES.register_module()
+class LovaszLoss(nn.Module):
+    """LovaszLoss.
+
+    This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate
+    for the optimization of the intersection-over-union measure in neural
+    networks <https://arxiv.org/abs/1705.08790>`_.
+
+    Args:
+        loss_type (str, optional): Binary or multi-class loss.
+            Default: 'multi_class'. Options are "binary" and "multi_class".
+        classes (str | list[int], optional): Classes choosed to calculate loss.
+            'all' for all classes, 'present' for classes present in labels, or
+            a list of classes to average. Default: 'present'.
+        per_image (bool, optional): If per_image is True, compute the loss per
+            image instead of per batch. Default: False.
+        reduction (str, optional): The method used to reduce the loss. Options
+            are "none", "mean" and "sum". This parameter only works when
+            per_image is True. Default: 'mean'.
+        class_weight (list[float], optional): The weight for each class.
+            Default: None.
+        loss_weight (float, optional): Weight of the loss. Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 loss_type='multi_class',
+                 classes='present',
+                 per_image=False,
+                 reduction='mean',
+                 class_weight=None,
+                 loss_weight=1.0):
+        super(LovaszLoss, self).__init__()
+        assert loss_type in ('binary', 'multi_class'), "loss_type should be \
+                                                    'binary' or 'multi_class'."
+
+        if loss_type == 'binary':
+            self.cls_criterion = lovasz_hinge
+        else:
+            self.cls_criterion = lovasz_softmax
+        assert classes in ('all', 'present') or mmcv.is_list_of(classes, int)
+        if not per_image:
+            assert reduction == 'none', "reduction should be 'none' when \
+                                                        per_image is False."
+
+        self.classes = classes
+        self.per_image = per_image
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+        self.class_weight = class_weight
+
+    def forward(self,
+                cls_score,
+                label,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None,
+                **kwargs):
+        """Forward function."""
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        if self.class_weight is not None:
+            class_weight = cls_score.new_tensor(self.class_weight)
+        else:
+            class_weight = None
+
+        # if multi-class loss, transform logits to probs
+        if self.cls_criterion == lovasz_softmax:
+            cls_score = F.softmax(cls_score, dim=1)
+
+        loss_cls = self.loss_weight * self.cls_criterion(
+            cls_score,
+            label,
+            self.classes,
+            self.per_image,
+            class_weight=class_weight,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            **kwargs)
+        return loss_cls
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/utils.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1153fa9f39f1f045def43b52bb55a06f301ff22
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/losses/utils.py
@@ -0,0 +1,101 @@
+import functools
+
+import torch.nn.functional as F
+
+
+def reduce_loss(loss, reduction):
+    """Reduce loss as specified.
+
+    Args:
+        loss (Tensor): Elementwise loss tensor.
+        reduction (str): Options are "none", "mean" and "sum".
+
+    Return:
+        Tensor: Reduced loss tensor.
+    """
+    reduction_enum = F._Reduction.get_enum(reduction)
+    # none: 0, elementwise_mean:1, sum: 2
+    if reduction_enum == 0:
+        return loss
+    elif reduction_enum == 1:
+        return loss.mean()
+    elif reduction_enum == 2:
+        return loss.sum()
+
+
+def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
+    """Apply element-wise weight and reduce loss.
+
+    Args:
+        loss (Tensor): Element-wise loss.
+        weight (Tensor): Element-wise weights.
+        reduction (str): Same as built-in losses of PyTorch.
+        avg_factor (float): Avarage factor when computing the mean of losses.
+
+    Returns:
+        Tensor: Processed loss values.
+    """
+    # if weight is specified, apply element-wise weight
+    if weight is not None:
+        assert weight.dim() == loss.dim()
+        if weight.dim() > 1:
+            assert weight.size(1) == 1 or weight.size(1) == loss.size(1)
+        loss = loss * weight
+
+    # if avg_factor is not specified, just reduce the loss
+    if avg_factor is None:
+        loss = reduce_loss(loss, reduction)
+    else:
+        # if reduction is mean, then average the loss by avg_factor
+        if reduction == 'mean':
+            loss = loss.sum() / avg_factor
+        # if reduction is 'none', then do nothing, otherwise raise an error
+        elif reduction != 'none':
+            raise ValueError('avg_factor can not be used with reduction="sum"')
+    return loss
+
+
+def weighted_loss(loss_func):
+    """Create a weighted version of a given loss function.
+
+    To use this decorator, the loss function must have the signature like
+    `loss_func(pred, target, **kwargs)`. The function only needs to compute
+    element-wise loss without any reduction. This decorator will add weight
+    and reduction arguments to the function. The decorated function will have
+    the signature like `loss_func(pred, target, weight=None, reduction='mean',
+    avg_factor=None, **kwargs)`.
+
+    :Example:
+
+    >>> import torch
+    >>> @weighted_loss
+    >>> def l1_loss(pred, target):
+    >>>     return (pred - target).abs()
+
+    >>> pred = torch.Tensor([0, 2, 3])
+    >>> target = torch.Tensor([1, 1, 1])
+    >>> weight = torch.Tensor([1, 0, 1])
+
+    >>> l1_loss(pred, target)
+    tensor(1.3333)
+    >>> l1_loss(pred, target, weight)
+    tensor(1.)
+    >>> l1_loss(pred, target, reduction='none')
+    tensor([1., 1., 2.])
+    >>> l1_loss(pred, target, weight, avg_factor=2)
+    tensor(1.5000)
+    """
+
+    @functools.wraps(loss_func)
+    def wrapper(pred,
+                target,
+                weight=None,
+                reduction='mean',
+                avg_factor=None,
+                **kwargs):
+        # get element-wise loss
+        loss = loss_func(pred, target, **kwargs)
+        loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+        return loss
+
+    return wrapper
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/necks/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/necks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0093021ebac1e46fbb798ed6ee96a192dbd8604c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/necks/__init__.py
@@ -0,0 +1,3 @@
+from .fpn import FPN
+
+__all__ = ['FPN']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/necks/fpn.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/necks/fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..f43d1e62f62b4cde0f181d3d44cef5383fec78b1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/necks/fpn.py
@@ -0,0 +1,212 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, xavier_init
+
+from ..builder import NECKS
+
+
+@NECKS.register_module()
+class FPN(nn.Module):
+    """Feature Pyramid Network.
+
+    This is an implementation of - Feature Pyramid Networks for Object
+    Detection (https://arxiv.org/abs/1612.03144)
+
+    Args:
+        in_channels (List[int]): Number of input channels per scale.
+        out_channels (int): Number of output channels (used at each scale)
+        num_outs (int): Number of output scales.
+        start_level (int): Index of the start input backbone level used to
+            build the feature pyramid. Default: 0.
+        end_level (int): Index of the end input backbone level (exclusive) to
+            build the feature pyramid. Default: -1, which means the last level.
+        add_extra_convs (bool | str): If bool, it decides whether to add conv
+            layers on top of the original feature maps. Default to False.
+            If True, its actual mode is specified by `extra_convs_on_inputs`.
+            If str, it specifies the source feature map of the extra convs.
+            Only the following options are allowed
+
+            - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
+            - 'on_lateral':  Last feature map after lateral convs.
+            - 'on_output': The last output feature map after fpn convs.
+        extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs
+            on the original feature from the backbone. If True,
+            it is equivalent to `add_extra_convs='on_input'`. If False, it is
+            equivalent to set `add_extra_convs='on_output'`. Default to True.
+        relu_before_extra_convs (bool): Whether to apply relu before the extra
+            conv. Default: False.
+        no_norm_on_lateral (bool): Whether to apply norm on lateral.
+            Default: False.
+        conv_cfg (dict): Config dict for convolution layer. Default: None.
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+        act_cfg (str): Config dict for activation layer in ConvModule.
+            Default: None.
+        upsample_cfg (dict): Config dict for interpolate layer.
+            Default: `dict(mode='nearest')`
+
+    Example:
+        >>> import torch
+        >>> in_channels = [2, 3, 5, 7]
+        >>> scales = [340, 170, 84, 43]
+        >>> inputs = [torch.rand(1, c, s, s)
+        ...           for c, s in zip(in_channels, scales)]
+        >>> self = FPN(in_channels, 11, len(in_channels)).eval()
+        >>> outputs = self.forward(inputs)
+        >>> for i in range(len(outputs)):
+        ...     print(f'outputs[{i}].shape = {outputs[i].shape}')
+        outputs[0].shape = torch.Size([1, 11, 340, 340])
+        outputs[1].shape = torch.Size([1, 11, 170, 170])
+        outputs[2].shape = torch.Size([1, 11, 84, 84])
+        outputs[3].shape = torch.Size([1, 11, 43, 43])
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_outs,
+                 start_level=0,
+                 end_level=-1,
+                 add_extra_convs=False,
+                 extra_convs_on_inputs=False,
+                 relu_before_extra_convs=False,
+                 no_norm_on_lateral=False,
+                 conv_cfg=None,
+                 norm_cfg=None,
+                 act_cfg=None,
+                 upsample_cfg=dict(mode='nearest')):
+        super(FPN, self).__init__()
+        assert isinstance(in_channels, list)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_ins = len(in_channels)
+        self.num_outs = num_outs
+        self.relu_before_extra_convs = relu_before_extra_convs
+        self.no_norm_on_lateral = no_norm_on_lateral
+        self.fp16_enabled = False
+        self.upsample_cfg = upsample_cfg.copy()
+
+        if end_level == -1:
+            self.backbone_end_level = self.num_ins
+            assert num_outs >= self.num_ins - start_level
+        else:
+            # if end_level < inputs, no extra level is allowed
+            self.backbone_end_level = end_level
+            assert end_level <= len(in_channels)
+            assert num_outs == end_level - start_level
+        self.start_level = start_level
+        self.end_level = end_level
+        self.add_extra_convs = add_extra_convs
+        assert isinstance(add_extra_convs, (str, bool))
+        if isinstance(add_extra_convs, str):
+            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
+            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
+        elif add_extra_convs:  # True
+            if extra_convs_on_inputs:
+                # For compatibility with previous release
+                # TODO: deprecate `extra_convs_on_inputs`
+                self.add_extra_convs = 'on_input'
+            else:
+                self.add_extra_convs = 'on_output'
+
+        self.lateral_convs = nn.ModuleList()
+        self.fpn_convs = nn.ModuleList()
+
+        for i in range(self.start_level, self.backbone_end_level):
+            l_conv = ConvModule(
+                in_channels[i],
+                out_channels,
+                1,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
+                act_cfg=act_cfg,
+                inplace=False)
+            fpn_conv = ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                inplace=False)
+
+            self.lateral_convs.append(l_conv)
+            self.fpn_convs.append(fpn_conv)
+
+        # add extra conv layers (e.g., RetinaNet)
+        extra_levels = num_outs - self.backbone_end_level + self.start_level
+        if self.add_extra_convs and extra_levels >= 1:
+            for i in range(extra_levels):
+                if i == 0 and self.add_extra_convs == 'on_input':
+                    in_channels = self.in_channels[self.backbone_end_level - 1]
+                else:
+                    in_channels = out_channels
+                extra_fpn_conv = ConvModule(
+                    in_channels,
+                    out_channels,
+                    3,
+                    stride=2,
+                    padding=1,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg,
+                    inplace=False)
+                self.fpn_convs.append(extra_fpn_conv)
+
+    # default init_weights for conv(msra) and norm in ConvModule
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                xavier_init(m, distribution='uniform')
+
+    def forward(self, inputs):
+        assert len(inputs) == len(self.in_channels)
+
+        # build laterals
+        laterals = [
+            lateral_conv(inputs[i + self.start_level])
+            for i, lateral_conv in enumerate(self.lateral_convs)
+        ]
+
+        # build top-down path
+        used_backbone_levels = len(laterals)
+        for i in range(used_backbone_levels - 1, 0, -1):
+            # In some cases, fixing `scale factor` (e.g. 2) is preferred, but
+            #  it cannot co-exist with `size` in `F.interpolate`.
+            if 'scale_factor' in self.upsample_cfg:
+                laterals[i - 1] += F.interpolate(laterals[i],
+                                                 **self.upsample_cfg)
+            else:
+                prev_shape = laterals[i - 1].shape[2:]
+                laterals[i - 1] += F.interpolate(
+                    laterals[i], size=prev_shape, **self.upsample_cfg)
+
+        # build outputs
+        # part 1: from original levels
+        outs = [
+            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
+        ]
+        # part 2: add extra levels
+        if self.num_outs > len(outs):
+            # use max pool to get more levels on top of outputs
+            # (e.g., Faster R-CNN, Mask R-CNN)
+            if not self.add_extra_convs:
+                for i in range(self.num_outs - used_backbone_levels):
+                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))
+            # add conv layers on top of original feature maps (RetinaNet)
+            else:
+                if self.add_extra_convs == 'on_input':
+                    extra_source = inputs[self.backbone_end_level - 1]
+                elif self.add_extra_convs == 'on_lateral':
+                    extra_source = laterals[-1]
+                elif self.add_extra_convs == 'on_output':
+                    extra_source = outs[-1]
+                else:
+                    raise NotImplementedError
+                outs.append(self.fpn_convs[used_backbone_levels](extra_source))
+                for i in range(used_backbone_levels + 1, self.num_outs):
+                    if self.relu_before_extra_convs:
+                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))
+                    else:
+                        outs.append(self.fpn_convs[i](outs[-1]))
+        return tuple(outs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f600ecb9fa61be81a74f2c51edf8b6bfdb9b3b5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/__init__.py
@@ -0,0 +1,4 @@
+from .cascade_encoder_decoder import CascadeEncoderDecoder
+from .encoder_decoder import EncoderDecoder
+
+__all__ = ['EncoderDecoder', 'CascadeEncoderDecoder']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/base.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..55b5452497e16c1f3a1f2b8216185ca32daab764
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/base.py
@@ -0,0 +1,270 @@
+import logging
+import warnings
+from abc import ABCMeta, abstractmethod
+from collections import OrderedDict
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from mmcv.runner import auto_fp16
+
+
+class BaseSegmentor(nn.Module):
+    """Base class for segmentors."""
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        super(BaseSegmentor, self).__init__()
+        self.fp16_enabled = False
+
+    @property
+    def with_neck(self):
+        """bool: whether the segmentor has neck"""
+        return hasattr(self, 'neck') and self.neck is not None
+
+    @property
+    def with_auxiliary_head(self):
+        """bool: whether the segmentor has auxiliary head"""
+        return hasattr(self,
+                       'auxiliary_head') and self.auxiliary_head is not None
+
+    @property
+    def with_decode_head(self):
+        """bool: whether the segmentor has decode head"""
+        return hasattr(self, 'decode_head') and self.decode_head is not None
+
+    @abstractmethod
+    def extract_feat(self, imgs):
+        """Placeholder for extract features from images."""
+        pass
+
+    @abstractmethod
+    def encode_decode(self, img, img_metas):
+        """Placeholder for encode images with backbone and decode into a
+        semantic segmentation map of the same size as input."""
+        pass
+
+    @abstractmethod
+    def forward_train(self, imgs, img_metas, **kwargs):
+        """Placeholder for Forward function for training."""
+        pass
+
+    @abstractmethod
+    def simple_test(self, img, img_meta, **kwargs):
+        """Placeholder for single image test."""
+        pass
+
+    @abstractmethod
+    def aug_test(self, imgs, img_metas, **kwargs):
+        """Placeholder for augmentation test."""
+        pass
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in segmentor.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if pretrained is not None:
+            logger = logging.getLogger()
+            logger.info(f'load model from: {pretrained}')
+
+    def forward_test(self, imgs, img_metas, **kwargs):
+        """
+        Args:
+            imgs (List[Tensor]): the outer list indicates test-time
+                augmentations and inner Tensor should have a shape NxCxHxW,
+                which contains all images in the batch.
+            img_metas (List[List[dict]]): the outer list indicates test-time
+                augs (multiscale, flip, etc.) and the inner list indicates
+                images in a batch.
+        """
+        for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
+            if not isinstance(var, list):
+                raise TypeError(f'{name} must be a list, but got '
+                                f'{type(var)}')
+
+        num_augs = len(imgs)
+        if num_augs != len(img_metas):
+            raise ValueError(f'num of augmentations ({len(imgs)}) != '
+                             f'num of image meta ({len(img_metas)})')
+        # all images in the same aug batch all of the same ori_shape and pad
+        # shape
+        for img_meta in img_metas:
+            ori_shapes = [_['ori_shape'] for _ in img_meta]
+            assert all(shape == ori_shapes[0] for shape in ori_shapes)
+            img_shapes = [_['img_shape'] for _ in img_meta]
+            assert all(shape == img_shapes[0] for shape in img_shapes)
+            pad_shapes = [_['pad_shape'] for _ in img_meta]
+            assert all(shape == pad_shapes[0] for shape in pad_shapes)
+      
+        if num_augs == 1:        
+            return self.simple_test(imgs[0], img_metas[0], **kwargs)
+        else:
+            return self.aug_test(imgs, img_metas, **kwargs)
+
+    @auto_fp16(apply_to=('img', ))
+    def forward(self, img, img_metas, return_loss=True, **kwargs):
+        """Calls either :func:`forward_train` or :func:`forward_test` depending
+        on whether ``return_loss`` is ``True``.
+
+        Note this setting will change the expected inputs. When
+        ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor
+        and List[dict]), and when ``resturn_loss=False``, img and img_meta
+        should be double nested (i.e.  List[Tensor], List[List[dict]]), with
+        the outer list indicating test time augmentations.
+        """
+        if return_loss:
+            return self.forward_train(img, img_metas, **kwargs)
+        else:
+            return self.forward_test(img, img_metas, **kwargs)
+
+    def train_step(self, data_batch, optimizer, **kwargs):
+        """The iteration step during training.
+
+        This method defines an iteration step during training, except for the
+        back propagation and optimizer updating, which are done in an optimizer
+        hook. Note that in some complicated cases or models, the whole process
+        including back propagation and optimizer updating is also defined in
+        this method, such as GAN.
+
+        Args:
+            data (dict): The output of dataloader.
+            optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
+                runner is passed to ``train_step()``. This argument is unused
+                and reserved.
+
+        Returns:
+            dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
+                ``num_samples``.
+                ``loss`` is a tensor for back propagation, which can be a
+                weighted sum of multiple losses.
+                ``log_vars`` contains all the variables to be sent to the
+                logger.
+                ``num_samples`` indicates the batch size (when the model is
+                DDP, it means the batch size on each GPU), which is used for
+                averaging the logs.
+        """
+        losses = self(**data_batch)
+        loss, log_vars = self._parse_losses(losses)
+
+        outputs = dict(
+            loss=loss,
+            log_vars=log_vars,
+            num_samples=len(data_batch['img'].data))
+
+        return outputs
+
+    def val_step(self, data_batch, **kwargs):
+        """The iteration step during validation.
+
+        This method shares the same signature as :func:`train_step`, but used
+        during val epochs. Note that the evaluation after training epochs is
+        not implemented with this method, but an evaluation hook.
+        """
+        output = self(**data_batch, **kwargs)
+        return output
+
+    @staticmethod
+    def _parse_losses(losses):
+        """Parse the raw outputs (losses) of the network.
+
+        Args:
+            losses (dict): Raw output of the network, which usually contain
+                losses and other necessary information.
+
+        Returns:
+            tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor
+                which may be a weighted sum of all losses, log_vars contains
+                all the variables to be sent to the logger.
+        """
+        log_vars = OrderedDict()
+        for loss_name, loss_value in losses.items():
+            if isinstance(loss_value, torch.Tensor):
+                log_vars[loss_name] = loss_value.mean()
+            elif isinstance(loss_value, list):
+                log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+            else:
+                raise TypeError(
+                    f'{loss_name} is not a tensor or list of tensors')
+
+        loss = sum(_value for _key, _value in log_vars.items()
+                   if 'loss' in _key)
+
+        log_vars['loss'] = loss
+        for loss_name, loss_value in log_vars.items():
+            # reduce loss when distributed training
+            if dist.is_available() and dist.is_initialized():
+                loss_value = loss_value.data.clone()
+                dist.all_reduce(loss_value.div_(dist.get_world_size()))
+            log_vars[loss_name] = loss_value.item()
+
+        return loss, log_vars
+
+    def show_result(self,
+                    img,
+                    result,
+                    palette=None,
+                    win_name='',
+                    show=False,
+                    wait_time=0,
+                    out_file=None):
+        """Draw `result` over `img`.
+
+        Args:
+            img (str or Tensor): The image to be displayed.
+            result (Tensor): The semantic segmentation results to draw over
+                `img`.
+            palette (list[list[int]]] | np.ndarray | None): The palette of
+                segmentation map. If None is given, random palette will be
+                generated. Default: None
+            win_name (str): The window name.
+            wait_time (int): Value of waitKey param.
+                Default: 0.
+            show (bool): Whether to show the image.
+                Default: False.
+            out_file (str or None): The filename to write the image.
+                Default: None.
+
+        Returns:
+            img (Tensor): Only if not `show` or `out_file`
+        """
+        img = mmcv.imread(img)
+        img = img.copy()
+        seg = result[0]
+        if palette is None:
+            if self.PALETTE is None:
+                palette = np.random.randint(
+                    0, 255, size=(len(self.CLASSES), 3))
+            else:
+                palette = self.PALETTE
+        palette = np.array(palette)
+        assert palette.shape[0] == len(self.CLASSES)
+        assert palette.shape[1] == 3
+        assert len(palette.shape) == 2
+        color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
+        for label, color in enumerate(palette):
+            color_seg[seg == label, :] = color
+        # convert to BGR
+        color_seg = color_seg[..., ::-1]
+
+        # from IPython import embed; embed(header='debug vis')
+        img = img * 0.5 + color_seg * 0.5
+        img = img.astype(np.uint8)
+        # if out_file specified, do not show image in window
+        if out_file is not None:
+            show = False
+
+        if show:
+            mmcv.imshow(img, win_name, wait_time)
+        if out_file is not None:
+            mmcv.imwrite(img, out_file)
+
+        if not (show or out_file):
+            warnings.warn('show==False and out_file is not specified, only '
+                          'result image will be returned')
+            return img
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/cascade_encoder_decoder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/cascade_encoder_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..220ab2bb365b61885482bdd86606e632f2af74ae
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/cascade_encoder_decoder.py
@@ -0,0 +1,98 @@
+from torch import nn
+
+from mmseg.core import add_prefix
+from mmseg.ops import resize
+from .. import builder
+from ..builder import SEGMENTORS
+from .encoder_decoder import EncoderDecoder
+
+
+@SEGMENTORS.register_module()
+class CascadeEncoderDecoder(EncoderDecoder):
+    """Cascade Encoder Decoder segmentors.
+
+    CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of
+    CascadeEncoderDecoder are cascaded. The output of previous decoder_head
+    will be the input of next decoder_head.
+    """
+
+    def __init__(self,
+                 num_stages,
+                 backbone,
+                 decode_head,
+                 neck=None,
+                 auxiliary_head=None,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        self.num_stages = num_stages
+        super(CascadeEncoderDecoder, self).__init__(
+            backbone=backbone,
+            decode_head=decode_head,
+            neck=neck,
+            auxiliary_head=auxiliary_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            pretrained=pretrained)
+
+    def _init_decode_head(self, decode_head):
+        """Initialize ``decode_head``"""
+        assert isinstance(decode_head, list)
+        assert len(decode_head) == self.num_stages
+        self.decode_head = nn.ModuleList()
+        for i in range(self.num_stages):
+            self.decode_head.append(builder.build_head(decode_head[i]))
+        self.align_corners = self.decode_head[-1].align_corners
+        self.num_classes = self.decode_head[-1].num_classes
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone and heads.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        self.backbone.init_weights(pretrained=pretrained)
+        for i in range(self.num_stages):
+            self.decode_head[i].init_weights()
+        if self.with_auxiliary_head:
+            if isinstance(self.auxiliary_head, nn.ModuleList):
+                for aux_head in self.auxiliary_head:
+                    aux_head.init_weights()
+            else:
+                self.auxiliary_head.init_weights()
+
+    def encode_decode(self, img, img_metas):
+        """Encode images with backbone and decode into a semantic segmentation
+        map of the same size as input."""
+        x = self.extract_feat(img)
+        out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg)
+        for i in range(1, self.num_stages):
+            out = self.decode_head[i].forward_test(x, out, img_metas,
+                                                   self.test_cfg)
+        out = resize(
+            input=out,
+            size=img.shape[2:],
+            mode='bilinear',
+            align_corners=self.align_corners)
+        return out
+
+    def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
+        """Run forward function and calculate loss for decode head in
+        training."""
+        losses = dict()
+
+        loss_decode = self.decode_head[0].forward_train(
+            x, img_metas, gt_semantic_seg, self.train_cfg)
+
+        losses.update(add_prefix(loss_decode, 'decode_0'))
+
+        for i in range(1, self.num_stages):
+            # forward test again, maybe unnecessary for most methods.
+            prev_outputs = self.decode_head[i - 1].forward_test(
+                x, img_metas, self.test_cfg)
+            loss_decode = self.decode_head[i].forward_train(
+                x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg)
+            losses.update(add_prefix(loss_decode, f'decode_{i}'))
+
+        return losses
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/encoder_decoder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/encoder_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..1037e9e55d1307fe05227c809876ea30785636b7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/segmentors/encoder_decoder.py
@@ -0,0 +1,306 @@
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmseg.core import add_prefix
+from mmseg.ops import resize
+from .. import builder
+from ..builder import SEGMENTORS
+from .base import BaseSegmentor
+import time
+
+
+@SEGMENTORS.register_module()
+class EncoderDecoder(BaseSegmentor):
+    """Encoder Decoder segmentors.
+
+    EncoderDecoder typically consists of backbone, decode_head, auxiliary_head.
+    Note that auxiliary_head is only used for deep supervision during training,
+    which could be dumped during inference.
+    """
+
+    def __init__(self,
+                 backbone,
+                 decode_head,
+                 neck=None,
+                 auxiliary_head=None,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        super(EncoderDecoder, self).__init__()
+        self.backbone = builder.build_backbone(backbone)
+        self.device = f'npu:{os.environ["LOCAL_RANK"]}'
+        if neck is not None:
+            self.neck = builder.build_neck(neck)
+        self._init_decode_head(decode_head)
+        self._init_auxiliary_head(auxiliary_head)
+
+        self.train_cfg = train_cfg
+        self.test_cfg = test_cfg
+
+        self.init_weights(pretrained=pretrained)
+
+        assert self.with_decode_head
+
+    def _init_decode_head(self, decode_head):
+        """Initialize ``decode_head``"""
+        self.decode_head = builder.build_head(decode_head)
+        self.align_corners = self.decode_head.align_corners
+        self.num_classes = self.decode_head.num_classes
+
+    def _init_auxiliary_head(self, auxiliary_head):
+        """Initialize ``auxiliary_head``"""
+        if auxiliary_head is not None:
+            if isinstance(auxiliary_head, list):
+                self.auxiliary_head = nn.ModuleList()
+                for head_cfg in auxiliary_head:
+                    self.auxiliary_head.append(builder.build_head(head_cfg))
+            else:
+                self.auxiliary_head = builder.build_head(auxiliary_head)
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone and heads.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+
+        super(EncoderDecoder, self).init_weights(pretrained)
+        self.backbone.init_weights(pretrained=pretrained)
+        self.decode_head.init_weights()
+        if self.with_auxiliary_head:
+            if isinstance(self.auxiliary_head, nn.ModuleList):
+                for aux_head in self.auxiliary_head:
+                    aux_head.init_weights()
+            else:
+                self.auxiliary_head.init_weights()
+
+    def extract_feat(self, img):
+        """Extract features from images."""
+        x = self.backbone(img)
+        if self.with_neck:
+            x = self.neck(x)
+        return x
+
+    def encode_decode(self, img, img_metas):
+        """Encode images with backbone and decode into a semantic segmentation
+        map of the same size as input."""
+        #start = time.time()
+        x = self.extract_feat(img)
+        #print('backbone time:', time.time() - start)
+        #start = time.time()        
+        out = self._decode_head_forward_test(x, img_metas)
+        #print('head time:', time.time() - start)        
+        out = out.cpu().float()
+        out = resize(
+            input=out,
+            size=img.shape[-2:],
+            mode='bilinear',
+            align_corners=self.align_corners)
+        return out
+
+    def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
+        """Run forward function and calculate loss for decode head in
+        training."""
+        losses = dict()
+        loss_decode = self.decode_head.forward_train(x, img_metas,
+                                                     gt_semantic_seg,
+                                                     self.train_cfg)
+
+        losses.update(add_prefix(loss_decode, 'decode'))
+        return losses
+
+    def _decode_head_forward_test(self, x, img_metas):
+        """Run forward function and calculate loss for decode head in
+        inference."""
+        seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg)
+        return seg_logits
+
+    def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg):
+        """Run forward function and calculate loss for auxiliary head in
+        training."""
+        losses = dict()
+        if isinstance(self.auxiliary_head, nn.ModuleList):
+            for idx, aux_head in enumerate(self.auxiliary_head):
+                loss_aux = aux_head.forward_train(x, img_metas,
+                                                  gt_semantic_seg,
+                                                  self.train_cfg)
+                losses.update(add_prefix(loss_aux, f'aux_{idx}'))
+        else:
+            loss_aux = self.auxiliary_head.forward_train(
+                x, img_metas, gt_semantic_seg, self.train_cfg)
+            losses.update(add_prefix(loss_aux, 'aux'))
+
+        return losses
+
+    def forward_dummy(self, img):
+        """Dummy forward function."""
+        seg_logit = self.encode_decode(img, None)
+
+        return seg_logit
+
+    def forward_train(self, img, img_metas, gt_semantic_seg):
+        """Forward function for training.
+
+        Args:
+            img (Tensor): Input images.
+            img_metas (list[dict]): List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            gt_semantic_seg (Tensor): Semantic segmentation masks
+                used if the architecture supports semantic segmentation task.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+        #start = time.time()
+        x = self.extract_feat(img)
+        #print('backbone time:', time.time() - start)
+
+        losses = dict()
+
+        loss_decode = self._decode_head_forward_train(x, img_metas,
+                                                      gt_semantic_seg)
+        losses.update(loss_decode)
+
+        if self.with_auxiliary_head:
+            loss_aux = self._auxiliary_head_forward_train(
+                x, img_metas, gt_semantic_seg)
+            losses.update(loss_aux)
+
+        return losses
+
+    # TODO refactor
+    def slide_inference(self, img, img_meta, rescale):
+        """Inference by sliding-window with overlap.
+
+        If h_crop > h_img or w_crop > w_img, the small patch will be used to
+        decode without padding.
+        """
+        if len(img.shape) == 3:
+            img = img.unsqueeze(0)
+        img = img.cpu().float()
+        h_stride, w_stride = self.test_cfg.stride
+        h_crop, w_crop = self.test_cfg.crop_size
+        batch_size, _, h_img, w_img = img.size()
+        num_classes = self.num_classes
+        h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
+        w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
+        preds = img.new_zeros((batch_size, num_classes, h_img, w_img))
+        count_mat = img.new_zeros((batch_size, 1, h_img, w_img))
+        for h_idx in range(h_grids):
+            for w_idx in range(w_grids):
+                y1 = h_idx * h_stride
+                x1 = w_idx * w_stride
+                y2 = min(y1 + h_crop, h_img)
+                x2 = min(x1 + w_crop, w_img)
+                y1 = max(y2 - h_crop, 0)
+                x1 = max(x2 - w_crop, 0)
+                crop_img = img[:, :, y1:y2, x1:x2]
+                crop_img = crop_img.to(self.device, non_blocking=True).half()                             
+                crop_seg_logit = self.encode_decode(crop_img, img_meta)
+                crop_seg_logit = crop_seg_logit.cpu().float()
+                preds += F.pad(crop_seg_logit,
+                               (int(x1), int(preds.shape[3] - x2), int(y1),
+                                int(preds.shape[2] - y2)))
+
+                count_mat[:, :, y1:y2, x1:x2] += 1
+        assert (count_mat == 0).sum() == 0
+        if torch.onnx.is_in_onnx_export():
+            # cast count_mat to constant while exporting to ONNX
+            count_mat = torch.from_numpy(
+                count_mat.cpu().float().detach().numpy()).to(device=img.device)
+        preds = preds / count_mat
+        if rescale:
+            preds = resize(
+                preds,
+                size=img_meta[0]['ori_shape'][:2],
+                mode='bilinear',
+                align_corners=self.align_corners,
+                warning=False)
+        return preds
+
+    def whole_inference(self, img, img_meta, rescale):
+        """Inference with full image."""
+        
+        seg_logit = self.encode_decode(img, img_meta)
+        if rescale:
+            seg_logit = resize(
+                seg_logit,
+                size=img_meta[0]['ori_shape'][:2],
+                mode='bilinear',
+                align_corners=self.align_corners,
+                warning=False)
+
+        return seg_logit
+
+    def inference(self, img, img_meta, rescale):
+        """Inference with slide/whole style.
+
+        Args:
+            img (Tensor): The input image of shape (N, 3, H, W).
+            img_meta (dict): Image info dict where each dict has: 'img_shape',
+                'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:Collect`.
+            rescale (bool): Whether rescale back to original shape.
+
+        Returns:
+            Tensor: The output segmentation map.
+        """
+
+        assert self.test_cfg.mode in ['slide', 'whole']
+        ori_shape = img_meta[0]['ori_shape']
+        assert all(_['ori_shape'] == ori_shape for _ in img_meta)
+        if self.test_cfg.mode == 'slide':
+            seg_logit = self.slide_inference(img, img_meta, rescale)
+        else:
+            seg_logit = self.whole_inference(img, img_meta, rescale)
+        output = F.softmax(seg_logit, dim=1)
+        flip = img_meta[0]['flip']
+        if flip:
+            flip_direction = img_meta[0]['flip_direction']
+            assert flip_direction in ['horizontal', 'vertical']
+            if flip_direction == 'horizontal':
+                output = output.flip(dims=(3, ))
+            elif flip_direction == 'vertical':
+                output = output.flip(dims=(2, ))
+
+        return output
+
+    def simple_test(self, img, img_meta, rescale=True):
+        """Simple test with single image."""
+        seg_logit = self.inference(img, img_meta, rescale)
+        seg_pred = seg_logit.argmax(dim=1)
+        if torch.onnx.is_in_onnx_export():
+            # our inference backend only support 4D output
+            seg_pred = seg_pred.unsqueeze(0)
+            return seg_pred
+        seg_pred = seg_pred.cpu().float().numpy()
+        # unravel batch dim
+        seg_pred = list(seg_pred)
+        return seg_pred
+
+    def aug_test(self, imgs, img_metas, rescale=True):
+        """Test with augmentations.
+
+        Only rescale=True is supported.
+        """
+        # aug_test rescale all imgs back to ori_shape for now
+        assert rescale
+        # to save memory, we get augmented seg logit inplace
+        seg_logit = self.inference(imgs[0], img_metas[0], rescale)
+        for i in range(1, len(imgs)):
+            cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale)
+            seg_logit += cur_seg_logit
+        seg_logit /= len(imgs)
+        seg_pred = seg_logit.argmax(dim=1)
+        seg_pred = seg_pred.cpu().float().numpy()
+        # unravel batch dim
+        seg_pred = list(seg_pred)
+        return seg_pred
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..413228626e42eccbec6e099fd6490f4aec614394
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/__init__.py
@@ -0,0 +1,10 @@
+from .inverted_residual import InvertedResidual, InvertedResidualV3
+from .make_divisible import make_divisible
+from .res_layer import ResLayer
+from .self_attention_block import SelfAttentionBlock
+from .up_conv_block import UpConvBlock
+
+__all__ = [
+    'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual',
+    'UpConvBlock', 'InvertedResidualV3'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/drop.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/drop.py
new file mode 100644
index 0000000000000000000000000000000000000000..31789083cf006fc6b0c91373066856c6a0f48280
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/drop.py
@@ -0,0 +1,159 @@
+""" DropBlock, DropPath
+PyTorch implementations of DropBlock and DropPath (Stochastic Depth) regularization layers.
+Papers:
+DropBlock: A regularization method for convolutional networks (https://arxiv.org/abs/1810.12890)
+Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382)
+Code:
+DropBlock impl inspired by two Tensorflow impl that I liked:
+ - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74
+ - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def drop_block_2d(
+        x, drop_prob: float = 0.1, block_size: int = 7,  gamma_scale: float = 1.0,
+        with_noise: bool = False, inplace: bool = False, batchwise: bool = False):
+    """ DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    DropBlock with an experimental gaussian noise option. This layer has been tested on a few training
+    runs with success, but needs further validation and possibly optimization for lower runtime impact.
+    """
+    B, C, H, W = x.shape
+    total_size = W * H
+    clipped_block_size = min(block_size, min(W, H))
+    # seed_drop_rate, the gamma parameter
+    gamma = gamma_scale * drop_prob * total_size / clipped_block_size ** 2 / (
+        (W - block_size + 1) * (H - block_size + 1))
+
+    # Forces the block to be inside the feature map.
+    w_i, h_i = torch.meshgrid(torch.arange(W).to(x.device), torch.arange(H).to(x.device))
+    valid_block = ((w_i >= clipped_block_size // 2) & (w_i < W - (clipped_block_size - 1) // 2)) & \
+                  ((h_i >= clipped_block_size // 2) & (h_i < H - (clipped_block_size - 1) // 2))
+    valid_block = torch.reshape(valid_block, (1, 1, H, W)).to(dtype=x.dtype)
+
+    if batchwise:
+        # one mask for whole batch, quite a bit faster
+        uniform_noise = torch.rand((1, C, H, W), dtype=x.dtype, device=x.device)
+    else:
+        uniform_noise = torch.rand_like(x)
+    block_mask = ((2 - gamma - valid_block + uniform_noise) >= 1).to(dtype=x.dtype)
+    block_mask = -F.max_pool2d(
+        -block_mask,
+        kernel_size=clipped_block_size,  # block_size,
+        stride=1,
+        padding=clipped_block_size // 2)
+
+    if with_noise:
+        normal_noise = torch.randn((1, C, H, W), dtype=x.dtype, device=x.device) if batchwise else torch.randn_like(x)
+        if inplace:
+            x.mul_(block_mask).add_(normal_noise * (1 - block_mask))
+        else:
+            x = x * block_mask + normal_noise * (1 - block_mask)
+    else:
+        normalize_scale = (block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-7)).to(x.dtype)
+        if inplace:
+            x.mul_(block_mask * normalize_scale)
+        else:
+            x = x * block_mask * normalize_scale
+    return x
+
+
+def drop_block_fast_2d(
+        x: torch.Tensor, drop_prob: float = 0.1, block_size: int = 7,
+        gamma_scale: float = 1.0, with_noise: bool = False, inplace: bool = False, batchwise: bool = False):
+    """ DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    DropBlock with an experimental gaussian noise option. Simplied from above without concern for valid
+    block mask at edges.
+    """
+    B, C, H, W = x.shape
+    total_size = W * H
+    clipped_block_size = min(block_size, min(W, H))
+    gamma = gamma_scale * drop_prob * total_size / clipped_block_size ** 2 / (
+            (W - block_size + 1) * (H - block_size + 1))
+
+    if batchwise:
+        # one mask for whole batch, quite a bit faster
+        block_mask = torch.rand((1, C, H, W), dtype=x.dtype, device=x.device) < gamma
+    else:
+        # mask per batch element
+        block_mask = torch.rand_like(x) < gamma
+    block_mask = F.max_pool2d(
+        block_mask.to(x.dtype), kernel_size=clipped_block_size, stride=1, padding=clipped_block_size // 2)
+
+    if with_noise:
+        normal_noise = torch.randn((1, C, H, W), dtype=x.dtype, device=x.device) if batchwise else torch.randn_like(x)
+        if inplace:
+            x.mul_(1. - block_mask).add_(normal_noise * block_mask)
+        else:
+            x = x * (1. - block_mask) + normal_noise * block_mask
+    else:
+        block_mask = 1 - block_mask
+        normalize_scale = (block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-7)).to(dtype=x.dtype)
+        if inplace:
+            x.mul_(block_mask * normalize_scale)
+        else:
+            x = x * block_mask * normalize_scale
+    return x
+
+
+class DropBlock2d(nn.Module):
+    """ DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    """
+    def __init__(self,
+                 drop_prob=0.1,
+                 block_size=7,
+                 gamma_scale=1.0,
+                 with_noise=False,
+                 inplace=False,
+                 batchwise=False,
+                 fast=True):
+        super(DropBlock2d, self).__init__()
+        self.drop_prob = drop_prob
+        self.gamma_scale = gamma_scale
+        self.block_size = block_size
+        self.with_noise = with_noise
+        self.inplace = inplace
+        self.batchwise = batchwise
+        self.fast = fast  # FIXME finish comparisons of fast vs not
+
+    def forward(self, x):
+        if not self.training or not self.drop_prob:
+            return x
+        if self.fast:
+            return drop_block_fast_2d(
+                x, self.drop_prob, self.block_size, self.gamma_scale, self.with_noise, self.inplace, self.batchwise)
+        else:
+            return drop_block_2d(
+                x, self.drop_prob, self.block_size, self.gamma_scale, self.with_noise, self.inplace, self.batchwise)
+
+
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/inverted_residual.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/inverted_residual.py
new file mode 100644
index 0000000000000000000000000000000000000000..093388f5643a1027095ef62e0158448df725fb82
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/inverted_residual.py
@@ -0,0 +1,208 @@
+from mmcv.cnn import ConvModule
+from torch import nn as nn
+from torch.utils import checkpoint as cp
+
+from .se_layer import SELayer
+
+
+class InvertedResidual(nn.Module):
+    """InvertedResidual block for MobileNetV2.
+
+    Args:
+        in_channels (int): The input channels of the InvertedResidual block.
+        out_channels (int): The output channels of the InvertedResidual block.
+        stride (int): Stride of the middle (first) 3x3 convolution.
+        expand_ratio (int): Adjusts number of channels of the hidden layer
+            in InvertedResidual by this amount.
+        dilation (int): Dilation rate of depthwise conv. Default: 1
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU6').
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+
+    Returns:
+        Tensor: The output tensor.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 expand_ratio,
+                 dilation=1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU6'),
+                 with_cp=False):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2], f'stride must in [1, 2]. ' \
+            f'But received {stride}.'
+        self.with_cp = with_cp
+        self.use_res_connect = self.stride == 1 and in_channels == out_channels
+        hidden_dim = int(round(in_channels * expand_ratio))
+
+        layers = []
+        if expand_ratio != 1:
+            layers.append(
+                ConvModule(
+                    in_channels=in_channels,
+                    out_channels=hidden_dim,
+                    kernel_size=1,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+        layers.extend([
+            ConvModule(
+                in_channels=hidden_dim,
+                out_channels=hidden_dim,
+                kernel_size=3,
+                stride=stride,
+                padding=dilation,
+                dilation=dilation,
+                groups=hidden_dim,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg),
+            ConvModule(
+                in_channels=hidden_dim,
+                out_channels=out_channels,
+                kernel_size=1,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=None)
+        ])
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            if self.use_res_connect:
+                return x + self.conv(x)
+            else:
+                return self.conv(x)
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
+
+
+class InvertedResidualV3(nn.Module):
+    """Inverted Residual Block for MobileNetV3.
+
+    Args:
+        in_channels (int): The input channels of this Module.
+        out_channels (int): The output channels of this Module.
+        mid_channels (int): The input channels of the depthwise convolution.
+        kernel_size (int): The kernal size of the depthwise convolution.
+            Default: 3.
+        stride (int): The stride of the depthwise convolution. Default: 1.
+        se_cfg (dict): Config dict for se layer. Defaul: None, which means no
+            se layer.
+        with_expand_conv (bool): Use expand conv or not. If set False,
+            mid_channels must be the same with in_channels. Default: True.
+        conv_cfg (dict): Config dict for convolution layer. Default: None,
+            which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU').
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+
+    Returns:
+        Tensor: The output tensor.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 mid_channels,
+                 kernel_size=3,
+                 stride=1,
+                 se_cfg=None,
+                 with_expand_conv=True,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 with_cp=False):
+        super(InvertedResidualV3, self).__init__()
+        self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
+        assert stride in [1, 2]
+        self.with_cp = with_cp
+        self.with_se = se_cfg is not None
+        self.with_expand_conv = with_expand_conv
+
+        if self.with_se:
+            assert isinstance(se_cfg, dict)
+        if not self.with_expand_conv:
+            assert mid_channels == in_channels
+
+        if self.with_expand_conv:
+            self.expand_conv = ConvModule(
+                in_channels=in_channels,
+                out_channels=mid_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+        self.depthwise_conv = ConvModule(
+            in_channels=mid_channels,
+            out_channels=mid_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=kernel_size // 2,
+            groups=mid_channels,
+            conv_cfg=dict(
+                type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+
+        if self.with_se:
+            self.se = SELayer(**se_cfg)
+
+        self.linear_conv = ConvModule(
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=None)
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            out = x
+
+            if self.with_expand_conv:
+                out = self.expand_conv(out)
+
+            out = self.depthwise_conv(out)
+
+            if self.with_se:
+                out = self.se(out)
+
+            out = self.linear_conv(out)
+
+            if self.with_res_shortcut:
+                return x + out
+            else:
+                return out
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        return out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/make_divisible.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/make_divisible.py
new file mode 100644
index 0000000000000000000000000000000000000000..75ad756052529f52fe83bb95dd1f0ecfc9a13078
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/make_divisible.py
@@ -0,0 +1,27 @@
+def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
+    """Make divisible function.
+
+    This function rounds the channel number to the nearest value that can be
+    divisible by the divisor. It is taken from the original tf repo. It ensures
+    that all layers have a channel number that is divisible by divisor. It can
+    be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py  # noqa
+
+    Args:
+        value (int): The original channel number.
+        divisor (int): The divisor to fully divide the channel number.
+        min_value (int): The minimum value of the output channel.
+            Default: None, means that the minimum value equal to the divisor.
+        min_ratio (float): The minimum ratio of the rounded channel number to
+            the original channel number. Default: 0.9.
+
+    Returns:
+        int: The modified output channel number.
+    """
+
+    if min_value is None:
+        min_value = divisor
+    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than (1-min_ratio).
+    if new_value < min_ratio * value:
+        new_value += divisor
+    return new_value
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/norm.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..616373c3c1d0e9dc9cac51f85d791346e2240c99
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/norm.py
@@ -0,0 +1,60 @@
+import torch
+import math
+import warnings
+
+
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+
+
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # type: (Tensor, float, float, float, float) -> Tensor
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/res_layer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/res_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2585ab551aea79252ef6b34b5faef476e9e1abaa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/res_layer.py
@@ -0,0 +1,94 @@
+from mmcv.cnn import build_conv_layer, build_norm_layer
+from torch import nn as nn
+
+
+class ResLayer(nn.Sequential):
+    """ResLayer to build ResNet style backbone.
+
+    Args:
+        block (nn.Module): block used to build ResLayer.
+        inplanes (int): inplanes of block.
+        planes (int): planes of block.
+        num_blocks (int): number of blocks.
+        stride (int): stride of the first block. Default: 1
+        avg_down (bool): Use AvgPool instead of stride conv when
+            downsampling in the bottleneck. Default: False
+        conv_cfg (dict): dictionary to construct and config conv layer.
+            Default: None
+        norm_cfg (dict): dictionary to construct and config norm layer.
+            Default: dict(type='BN')
+        multi_grid (int | None): Multi grid dilation rates of last
+            stage. Default: None
+        contract_dilation (bool): Whether contract first dilation of each layer
+            Default: False
+    """
+
+    def __init__(self,
+                 block,
+                 inplanes,
+                 planes,
+                 num_blocks,
+                 stride=1,
+                 dilation=1,
+                 avg_down=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 multi_grid=None,
+                 contract_dilation=False,
+                 **kwargs):
+        self.block = block
+
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = []
+            conv_stride = stride
+            if avg_down:
+                conv_stride = 1
+                downsample.append(
+                    nn.AvgPool2d(
+                        kernel_size=stride,
+                        stride=stride,
+                        ceil_mode=True,
+                        count_include_pad=False))
+            downsample.extend([
+                build_conv_layer(
+                    conv_cfg,
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=conv_stride,
+                    bias=False),
+                build_norm_layer(norm_cfg, planes * block.expansion)[1]
+            ])
+            downsample = nn.Sequential(*downsample)
+
+        layers = []
+        if multi_grid is None:
+            if dilation > 1 and contract_dilation:
+                first_dilation = dilation // 2
+            else:
+                first_dilation = dilation
+        else:
+            first_dilation = multi_grid[0]
+        layers.append(
+            block(
+                inplanes=inplanes,
+                planes=planes,
+                stride=stride,
+                dilation=first_dilation,
+                downsample=downsample,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                **kwargs))
+        inplanes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(
+                block(
+                    inplanes=inplanes,
+                    planes=planes,
+                    stride=1,
+                    dilation=dilation if multi_grid is None else multi_grid[i],
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    **kwargs))
+        super(ResLayer, self).__init__(*layers)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/se_layer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/se_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d75e712cb467eab3e099e900cb3c7a2911a337d5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/se_layer.py
@@ -0,0 +1,57 @@
+import mmcv
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+
+from .make_divisible import make_divisible
+
+
+class SELayer(nn.Module):
+    """Squeeze-and-Excitation Module.
+
+    Args:
+        channels (int): The input (and output) channels of the SE layer.
+        ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
+            ``int(channels/ratio)``. Default: 16.
+        conv_cfg (None or dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        act_cfg (dict or Sequence[dict]): Config dict for activation layer.
+            If act_cfg is a dict, two activation layers will be configurated
+            by this dict. If act_cfg is a sequence of dicts, the first
+            activation layer will be configurated by the first dict and the
+            second activation layer will be configurated by the second dict.
+            Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
+            divisor=6.0)).
+    """
+
+    def __init__(self,
+                 channels,
+                 ratio=16,
+                 conv_cfg=None,
+                 act_cfg=(dict(type='ReLU'),
+                          dict(type='HSigmoid', bias=3.0, divisor=6.0))):
+        super(SELayer, self).__init__()
+        if isinstance(act_cfg, dict):
+            act_cfg = (act_cfg, act_cfg)
+        assert len(act_cfg) == 2
+        assert mmcv.is_tuple_of(act_cfg, dict)
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = ConvModule(
+            in_channels=channels,
+            out_channels=make_divisible(channels // ratio, 8),
+            kernel_size=1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            act_cfg=act_cfg[0])
+        self.conv2 = ConvModule(
+            in_channels=make_divisible(channels // ratio, 8),
+            out_channels=channels,
+            kernel_size=1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            act_cfg=act_cfg[1])
+
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        return x * out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/self_attention_block.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/self_attention_block.py
new file mode 100644
index 0000000000000000000000000000000000000000..372fad2e000a157c7ef283a82388a7fea0158ee0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/self_attention_block.py
@@ -0,0 +1,159 @@
+import torch
+from mmcv.cnn import ConvModule, constant_init
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+class SelfAttentionBlock(nn.Module):
+    """General self-attention block/non-local block.
+
+    Please refer to https://arxiv.org/abs/1706.03762 for details about key,
+    query and value.
+
+    Args:
+        key_in_channels (int): Input channels of key feature.
+        query_in_channels (int): Input channels of query feature.
+        channels (int): Output channels of key/query transform.
+        out_channels (int): Output channels.
+        share_key_query (bool): Whether share projection weight between key
+            and query projection.
+        query_downsample (nn.Module): Query downsample module.
+        key_downsample (nn.Module): Key downsample module.
+        key_query_num_convs (int): Number of convs for key/query projection.
+        value_num_convs (int): Number of convs for value projection.
+        matmul_norm (bool): Whether normalize attention map with sqrt of
+            channels
+        with_out (bool): Whether use out projection.
+        conv_cfg (dict|None): Config of conv layers.
+        norm_cfg (dict|None): Config of norm layers.
+        act_cfg (dict|None): Config of activation layers.
+    """
+
+    def __init__(self, key_in_channels, query_in_channels, channels,
+                 out_channels, share_key_query, query_downsample,
+                 key_downsample, key_query_num_convs, value_out_num_convs,
+                 key_query_norm, value_out_norm, matmul_norm, with_out,
+                 conv_cfg, norm_cfg, act_cfg):
+        super(SelfAttentionBlock, self).__init__()
+        if share_key_query:
+            assert key_in_channels == query_in_channels
+        self.key_in_channels = key_in_channels
+        self.query_in_channels = query_in_channels
+        self.out_channels = out_channels
+        self.channels = channels
+        self.share_key_query = share_key_query
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.key_project = self.build_project(
+            key_in_channels,
+            channels,
+            num_convs=key_query_num_convs,
+            use_conv_module=key_query_norm,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        if share_key_query:
+            self.query_project = self.key_project
+        else:
+            self.query_project = self.build_project(
+                query_in_channels,
+                channels,
+                num_convs=key_query_num_convs,
+                use_conv_module=key_query_norm,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+        self.value_project = self.build_project(
+            key_in_channels,
+            channels if with_out else out_channels,
+            num_convs=value_out_num_convs,
+            use_conv_module=value_out_norm,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg)
+        if with_out:
+            self.out_project = self.build_project(
+                channels,
+                out_channels,
+                num_convs=value_out_num_convs,
+                use_conv_module=value_out_norm,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+        else:
+            self.out_project = None
+
+        self.query_downsample = query_downsample
+        self.key_downsample = key_downsample
+        self.matmul_norm = matmul_norm
+
+        self.init_weights()
+
+    def init_weights(self):
+        """Initialize weight of later layer."""
+        if self.out_project is not None:
+            if not isinstance(self.out_project, ConvModule):
+                constant_init(self.out_project, 0)
+
+    def build_project(self, in_channels, channels, num_convs, use_conv_module,
+                      conv_cfg, norm_cfg, act_cfg):
+        """Build projection layer for key/query/value/out."""
+        if use_conv_module:
+            convs = [
+                ConvModule(
+                    in_channels,
+                    channels,
+                    1,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg)
+            ]
+            for _ in range(num_convs - 1):
+                convs.append(
+                    ConvModule(
+                        channels,
+                        channels,
+                        1,
+                        conv_cfg=conv_cfg,
+                        norm_cfg=norm_cfg,
+                        act_cfg=act_cfg))
+        else:
+            convs = [nn.Conv2d(in_channels, channels, 1)]
+            for _ in range(num_convs - 1):
+                convs.append(nn.Conv2d(channels, channels, 1))
+        if len(convs) > 1:
+            convs = nn.Sequential(*convs)
+        else:
+            convs = convs[0]
+        return convs
+
+    def forward(self, query_feats, key_feats):
+        """Forward function."""
+        batch_size = query_feats.size(0)
+        query = self.query_project(query_feats)
+        if self.query_downsample is not None:
+            query = self.query_downsample(query)
+        query = query.reshape(*query.shape[:2], -1)
+        query = query.permute(0, 2, 1).contiguous()
+
+        key = self.key_project(key_feats)
+        value = self.value_project(key_feats)
+        if self.key_downsample is not None:
+            key = self.key_downsample(key)
+            value = self.key_downsample(value)
+        key = key.reshape(*key.shape[:2], -1)
+        value = value.reshape(*value.shape[:2], -1)
+        value = value.permute(0, 2, 1).contiguous()
+
+        sim_map = torch.matmul(query, key)
+        if self.matmul_norm:
+            sim_map = (self.channels**-.5) * sim_map
+        sim_map = F.softmax(sim_map, dim=-1)
+
+        context = torch.matmul(sim_map, value)
+        context = context.permute(0, 2, 1).contiguous()
+        context = context.reshape(batch_size, -1, *query_feats.shape[2:])
+        if self.out_project is not None:
+            context = self.out_project(context)
+        return context
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/up_conv_block.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/up_conv_block.py
new file mode 100644
index 0000000000000000000000000000000000000000..df8a2aa7db31cf80d3c75adc4ca6da3155a75890
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/models/utils/up_conv_block.py
@@ -0,0 +1,101 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, build_upsample_layer
+
+
+class UpConvBlock(nn.Module):
+    """Upsample convolution block in decoder for UNet.
+
+    This upsample convolution block consists of one upsample module
+    followed by one convolution block. The upsample module expands the
+    high-level low-resolution feature map and the convolution block fuses
+    the upsampled high-level low-resolution feature map and the low-level
+    high-resolution feature map from encoder.
+
+    Args:
+        conv_block (nn.Sequential): Sequential of convolutional layers.
+        in_channels (int): Number of input channels of the high-level
+        skip_channels (int): Number of input channels of the low-level
+        high-resolution feature map from encoder.
+        out_channels (int): Number of output channels.
+        num_convs (int): Number of convolutional layers in the conv_block.
+            Default: 2.
+        stride (int): Stride of convolutional layer in conv_block. Default: 1.
+        dilation (int): Dilation rate of convolutional layer in conv_block.
+            Default: 1.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default: False.
+        conv_cfg (dict | None): Config dict for convolution layer.
+            Default: None.
+        norm_cfg (dict | None): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict | None): Config dict for activation layer in ConvModule.
+            Default: dict(type='ReLU').
+        upsample_cfg (dict): The upsample config of the upsample module in
+            decoder. Default: dict(type='InterpConv'). If the size of
+            high-level feature map is the same as that of skip feature map
+            (low-level feature map from encoder), it does not need upsample the
+            high-level feature map and the upsample_cfg is None.
+        dcn (bool): Use deformable convoluton in convolutional layer or not.
+            Default: None.
+        plugins (dict): plugins for convolutional layers. Default: None.
+    """
+
+    def __init__(self,
+                 conv_block,
+                 in_channels,
+                 skip_channels,
+                 out_channels,
+                 num_convs=2,
+                 stride=1,
+                 dilation=1,
+                 with_cp=False,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 upsample_cfg=dict(type='InterpConv'),
+                 dcn=None,
+                 plugins=None):
+        super(UpConvBlock, self).__init__()
+        assert dcn is None, 'Not implemented yet.'
+        assert plugins is None, 'Not implemented yet.'
+
+        self.conv_block = conv_block(
+            in_channels=2 * skip_channels,
+            out_channels=out_channels,
+            num_convs=num_convs,
+            stride=stride,
+            dilation=dilation,
+            with_cp=with_cp,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            dcn=None,
+            plugins=None)
+        if upsample_cfg is not None:
+            self.upsample = build_upsample_layer(
+                cfg=upsample_cfg,
+                in_channels=in_channels,
+                out_channels=skip_channels,
+                with_cp=with_cp,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+        else:
+            self.upsample = ConvModule(
+                in_channels,
+                skip_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg)
+
+    def forward(self, skip, x):
+        """Forward function."""
+
+        x = self.upsample(x)
+        out = torch.cat([skip, x], dim=1)
+        out = self.conv_block(out)
+
+        return out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..efbf8fa73da301540366e59083165843e0d9abf9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/__init__.py
@@ -0,0 +1,4 @@
+from .encoding import Encoding
+from .wrappers import Upsample, resize, resize2
+
+__all__ = ['Upsample', 'resize', 'resize2', 'Encoding']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/encoding.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..d939189657576b3bb13f6fdb512775aa97737d24
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/encoding.py
@@ -0,0 +1,74 @@
+import torch
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+class Encoding(nn.Module):
+    """Encoding Layer: a learnable residual encoder.
+
+    Input is of shape  (batch_size, channels, height, width).
+    Output is of shape (batch_size, num_codes, channels).
+
+    Args:
+        channels: dimension of the features or feature channels
+        num_codes: number of code words
+    """
+
+    def __init__(self, channels, num_codes):
+        super(Encoding, self).__init__()
+        # init codewords and smoothing factor
+        self.channels, self.num_codes = channels, num_codes
+        std = 1. / ((num_codes * channels)**0.5)
+        # [num_codes, channels]
+        self.codewords = nn.Parameter(
+            torch.empty(num_codes, channels,
+                        dtype=torch.float).uniform_(-std, std),
+            requires_grad=True)
+        # [num_codes]
+        self.scale = nn.Parameter(
+            torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0),
+            requires_grad=True)
+
+    @staticmethod
+    def scaled_l2(x, codewords, scale):
+        num_codes, channels = codewords.size()
+        batch_size = x.size(0)
+        reshaped_scale = scale.view((1, 1, num_codes))
+        expanded_x = x.unsqueeze(2).expand(
+            (batch_size, x.size(1), num_codes, channels))
+        reshaped_codewords = codewords.view((1, 1, num_codes, channels))
+
+        scaled_l2_norm = reshaped_scale * (
+            expanded_x - reshaped_codewords).pow(2).sum(dim=3)
+        return scaled_l2_norm
+
+    @staticmethod
+    def aggregate(assigment_weights, x, codewords):
+        num_codes, channels = codewords.size()
+        reshaped_codewords = codewords.view((1, 1, num_codes, channels))
+        batch_size = x.size(0)
+
+        expanded_x = x.unsqueeze(2).expand(
+            (batch_size, x.size(1), num_codes, channels))
+        encoded_feat = (assigment_weights.unsqueeze(3) *
+                        (expanded_x - reshaped_codewords)).sum(dim=1)
+        return encoded_feat
+
+    def forward(self, x):
+        assert x.dim() == 4 and x.size(1) == self.channels
+        # [batch_size, channels, height, width]
+        batch_size = x.size(0)
+        # [batch_size, height x width, channels]
+        x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
+        # assignment_weights: [batch_size, channels, num_codes]
+        assigment_weights = F.softmax(
+            self.scaled_l2(x, self.codewords, self.scale), dim=2)
+        # aggregate
+        encoded_feat = self.aggregate(assigment_weights, x, self.codewords)
+        return encoded_feat
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \
+                    f'x{self.channels})'
+        return repr_str
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/wrappers.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..080d14d1ace734c55b9122eddf0116dcf631e01f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/ops/wrappers.py
@@ -0,0 +1,83 @@
+import warnings
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+
+
+def resize(input,
+           size=None,
+           scale_factor=None,
+           mode='nearest',
+           align_corners=None,
+           warning=True):
+    if warning:
+        if size is not None and align_corners:
+            input_h, input_w = tuple(int(x) for x in input.shape[2:])
+            output_h, output_w = tuple(int(x) for x in size)
+            if output_h > input_h or output_w > output_h:
+                if ((output_h > 1 and output_w > 1 and input_h > 1
+                     and input_w > 1) and (output_h - 1) % (input_h - 1)
+                        and (output_w - 1) % (input_w - 1)):
+                    warnings.warn(
+                        f'When align_corners={align_corners}, '
+                        'the output would more aligned if '
+                        f'input size {(input_h, input_w)} is `x+1` and '
+                        f'out size {(output_h, output_w)} is `nx+1`')
+    if isinstance(size, torch.Size):
+        size = tuple(int(x) for x in size)
+    #return torch._C._nn.upsample_bilinear2d(input, size, align_corners, 0, 0)
+    return F.interpolate(input, size, scale_factor, mode, align_corners)
+
+class resize2(nn.Module):
+    def __init__(self, scale_factor, out_channels):
+        super(resize2, self).__init__()
+        self.scale_factor = scale_factor
+        self.up = nn.ConvTranspose2d(out_channels, out_channels, scale_factor*2, stride=scale_factor, \
+                padding=scale_factor//2, output_padding=0, groups=out_channels, bias=False)
+        self._init_weights()
+    
+    def _init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.ConvTranspose2d):
+                # In order to be consistent with the source code,
+                # reset the ConvTranspose2d initialization parameters
+                m.reset_parameters()
+                # Simulated bilinear upsampling kernel
+                w = m.weight.data
+                f = math.ceil(w.size(2) / 2)
+                c = (2 * f - 1 - f % 2) / (2. * f)
+                for i in range(w.size(2)):
+                    for j in range(w.size(3)):
+                        w[0, 0, i, j] = \
+                            (1 - math.fabs(i / f - c)) * (
+                                    1 - math.fabs(j / f - c))
+                for c in range(1, w.size(0)):
+                    w[c, 0, :, :] = w[0, 0, :, :]
+    def forward(self, x):
+        x = self.up(x)
+        return x
+
+class Upsample(nn.Module):
+
+    def __init__(self,
+                 size=None,
+                 scale_factor=None,
+                 mode='nearest',
+                 align_corners=None):
+        super(Upsample, self).__init__()
+        self.size = size
+        if isinstance(scale_factor, tuple):
+            self.scale_factor = tuple(float(factor) for factor in scale_factor)
+        else:
+            self.scale_factor = float(scale_factor) if scale_factor else None
+        self.mode = mode
+        self.align_corners = align_corners
+
+    def forward(self, x):
+        if not self.size:
+            size = [int(t * self.scale_factor) for t in x.shape[-2:]]
+        else:
+            size = self.size
+        return resize(x, size, None, self.mode, self.align_corners)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..615f2820e948fc9a2b7622cf13f33e797ef92afa
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/__init__.py
@@ -0,0 +1,4 @@
+from .collect_env import collect_env
+from .logger import get_root_logger, print_log
+
+__all__ = ['get_root_logger', 'collect_env', 'print_log']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/collect_env.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/collect_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..8293a05fb3422249485c4d877749fc9c92a51e36
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/collect_env.py
@@ -0,0 +1,17 @@
+from mmcv.utils import collect_env as collect_base_env
+from mmcv.utils import get_git_hash
+
+import mmseg
+
+
+def collect_env():
+    """Collect the information of the running environments."""
+    env_info = collect_base_env()
+    env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}'
+
+    return env_info
+
+
+if __name__ == '__main__':
+    for name, val in collect_env().items():
+        print('{}: {}'.format(name, val))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/logger.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..14515843985486942eb2814bbeac2250feff0a6b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/utils/logger.py
@@ -0,0 +1,51 @@
+import logging
+
+from mmcv.utils import get_logger
+
+
+def get_root_logger(log_file=None, log_level=logging.INFO):
+    """Get the root logger.
+
+    The logger will be initialized if it has not been initialized. By default a
+    StreamHandler will be added. If `log_file` is specified, a FileHandler will
+    also be added. The name of the root logger is the top-level package name,
+    e.g., "mmseg".
+
+    Args:
+        log_file (str | None): The log filename. If specified, a FileHandler
+            will be added to the root logger.
+        log_level (int): The root logger level. Note that only the process of
+            rank 0 is affected, while other processes will set the level to
+            "Error" and be silent most of the time.
+
+    Returns:
+        logging.Logger: The root logger.
+    """
+
+    logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level)
+
+    return logger
+
+def print_log(msg, logger=None, level=logging.INFO):
+    """Print a log message.
+    Args:
+        msg (str): The message to be logged.
+        logger (logging.Logger | str | None): The logger to be used. Some
+            special loggers are:
+            - "root": the root logger obtained with `get_root_logger()`.
+            - "silent": no message will be printed.
+            - None: The `print()` method will be used to print log messages.
+        level (int): Logging level. Only available when `logger` is a Logger
+            object or "root".
+    """
+    if logger is None:
+        print(msg)
+    elif logger == 'root':
+        _logger = get_root_logger()
+        _logger.log(level, msg)
+    elif isinstance(logger, logging.Logger):
+        logger.log(level, msg)
+    elif logger != 'silent':
+        raise TypeError(
+            'logger should be either a logging.Logger object, "root", '
+            '"silent" or None, but got {}'.format(logger))
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/version.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..41a08cf155f9b938972b331c216349ef848840a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/mmseg/version.py
@@ -0,0 +1,18 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+
+__version__ = '0.11.0'
+
+
+def parse_version_info(version_str):
+    version_info = []
+    for x in version_str.split('.'):
+        if x.isdigit():
+            version_info.append(int(x))
+        elif x.find('rc') != -1:
+            patch_version = x.split('rc')
+            version_info.append(int(patch_version[0]))
+            version_info.append(f'rc{patch_version[1]}')
+    return tuple(version_info)
+
+
+version_info = parse_version_info(__version__)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/pytest.ini b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/pytest.ini
new file mode 100644
index 0000000000000000000000000000000000000000..9796e871e70c7c67345b1d6bcf708c0c82377a98
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+addopts = --xdoctest --xdoctest-style=auto
+norecursedirs = .git ignore build __pycache__ data docker docs .eggs
+
+filterwarnings= default
+                ignore:.*No cfgstr given in Cacher constructor or call.*:Warning
+                ignore:.*Define the __nice__ method for.*:Warning
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6da5adea757ffc79ac35e544d4afe85c5f44a90d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements.txt
@@ -0,0 +1,3 @@
+-r requirements/optional.txt
+-r requirements/runtime.txt
+-r requirements/tests.txt
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/docs.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/docs.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89fbf86c01cb29f10f7e99c910248c4d5229da58
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/docs.txt
@@ -0,0 +1,4 @@
+recommonmark
+sphinx
+sphinx_markdown_tables
+sphinx_rtd_theme
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/optional.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/optional.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47fa5933159eb068640f6b45281d36ab4af294cb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/optional.txt
@@ -0,0 +1 @@
+cityscapesscripts
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/readthedocs.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/readthedocs.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0542bfce6dff3b002a1d33e53c0be975e7feed4a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/readthedocs.txt
@@ -0,0 +1,3 @@
+mmcv
+torch
+torchvision
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/runtime.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/runtime.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8347b9c0c067c6ebeec91d7c235d8e15ee65536
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/runtime.txt
@@ -0,0 +1,3 @@
+matplotlib
+numpy
+terminaltables
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/tests.txt b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/tests.txt
new file mode 100644
index 0000000000000000000000000000000000000000..991fd711d4eed749f8f3440f9c4b448e3973575f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/requirements/tests.txt
@@ -0,0 +1,7 @@
+codecov
+flake8
+interrogate
+isort==4.3.21
+pytest
+xdoctest>=0.10.0
+yapf
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/setup.cfg b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..708fb4ce33fcf55bd448bc2234f38c1891ae0356
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/setup.cfg
@@ -0,0 +1,13 @@
+[yapf]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = true
+split_before_expression_after_opening_paren = true
+
+[isort]
+line_length = 79
+multi_line_output = 0
+known_standard_library = setuptools
+known_first_party = mmseg
+known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,terminaltables,torch
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/setup.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e69551b8f1b221f2fc645e52c6baf020bf5d5e8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/setup.py
@@ -0,0 +1,126 @@
+from setuptools import find_packages, setup
+
+
+def readme():
+    with open('README.md', encoding='utf-8') as f:
+        content = f.read()
+    return content
+
+
+version_file = 'mmseg/version.py'
+
+
+def get_version():
+    with open(version_file, 'r') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+
+
+def parse_requirements(fname='requirements.txt', with_version=True):
+    """Parse the package dependencies listed in a requirements file but strips
+    specific versioning information.
+
+    Args:
+        fname (str): path to requirements file
+        with_version (bool, default=False): if True include version specs
+
+    Returns:
+        List[str]: list of requirements items
+
+    CommandLine:
+        python -c "import setup; print(setup.parse_requirements())"
+    """
+    import sys
+    from os.path import exists
+    import re
+    require_fpath = fname
+
+    def parse_line(line):
+        """Parse information from a line in a requirements text file."""
+        if line.startswith('-r '):
+            # Allow specifying requirements in other files
+            target = line.split(' ')[1]
+            for info in parse_require_file(target):
+                yield info
+        else:
+            info = {'line': line}
+            if line.startswith('-e '):
+                info['package'] = line.split('#egg=')[1]
+            else:
+                # Remove versioning from the package
+                pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+                parts = re.split(pat, line, maxsplit=1)
+                parts = [p.strip() for p in parts]
+
+                info['package'] = parts[0]
+                if len(parts) > 1:
+                    op, rest = parts[1:]
+                    if ';' in rest:
+                        # Handle platform specific dependencies
+                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+                        version, platform_deps = map(str.strip,
+                                                     rest.split(';'))
+                        info['platform_deps'] = platform_deps
+                    else:
+                        version = rest  # NOQA
+                    info['version'] = (op, version)
+            yield info
+
+    def parse_require_file(fpath):
+        with open(fpath, 'r') as f:
+            for line in f.readlines():
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    for info in parse_line(line):
+                        yield info
+
+    def gen_packages_items():
+        if exists(require_fpath):
+            for info in parse_require_file(require_fpath):
+                parts = [info['package']]
+                if with_version and 'version' in info:
+                    parts.extend(info['version'])
+                if not sys.version.startswith('3.4'):
+                    # apparently package_deps are broken in 3.4
+                    platform_deps = info.get('platform_deps')
+                    if platform_deps is not None:
+                        parts.append(';' + platform_deps)
+                item = ''.join(parts)
+                yield item
+
+    packages = list(gen_packages_items())
+    return packages
+
+
+if __name__ == '__main__':
+    setup(
+        name='mmsegmentation',
+        version=get_version(),
+        description='Open MMLab Semantic Segmentation Toolbox and Benchmark',
+        long_description=readme(),
+        long_description_content_type='text/markdown',
+        author='MMSegmentation Authors',
+        author_email='openmmlab@gmail.com',
+        keywords='computer vision, semantic segmentation',
+        url='http://github.com/open-mmlab/mmsegmentation',
+        packages=find_packages(exclude=('configs', 'tools', 'demo')),
+        classifiers=[
+            'Development Status :: 4 - Beta',
+            'License :: OSI Approved :: Apache Software License',
+            'Operating System :: OS Independent',
+            'Programming Language :: Python :: 3.6',
+            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
+        ],
+        license='Apache License 2.0',
+        setup_requires=parse_requirements('requirements/build.txt'),
+        tests_require=parse_requirements('requirements/tests.txt'),
+        install_requires=parse_requirements('requirements/runtime.txt'),
+        extras_require={
+            'all': parse_requirements('requirements.txt'),
+            'tests': parse_requirements('requirements/tests.txt'),
+            'build': parse_requirements('requirements/build.txt'),
+            'optional': parse_requirements('requirements/optional.txt'),
+        },
+        ext_modules=[],
+        zip_safe=False)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/env_npu.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/env_npu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c0765a6738b393073c4d706d601eaf3d799043b9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/env_npu.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+export install_path=/usr/local/Ascend
+
+if [ -d ${install_path}/toolkit ]; then
+    export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
+    export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
+    export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
+    export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
+    export ASCEND_OPP_PATH=${install_path}/opp
+else
+    if [ -d ${install_path}/nnae/latest ];then
+        export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
+        export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
+        export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
+        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+        export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+        export ASCEND_AICPU_PATH=${install_path}/nnae/latest
+    else
+        export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
+        export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
+        export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
+        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+        export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+        export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
+    fi
+fi
+
+${install_path}/driver/tools/msnpureport -g error -d 0
+${install_path}/driver/tools/msnpureport -g error -d 1
+${install_path}/driver/tools/msnpureport -g error -d 2
+${install_path}/driver/tools/msnpureport -g error -d 3
+${install_path}/driver/tools/msnpureport -g error -d 4
+${install_path}/driver/tools/msnpureport -g error -d 5
+${install_path}/driver/tools/msnpureport -g error -d 6
+${install_path}/driver/tools/msnpureport -g error -d 7
+
+#将Host日志输出到串口,0-关闭/1-开启
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+#设置默认日志级别,0-debug/1-info/2-warning/3-error
+export ASCEND_GLOBAL_LOG_LEVEL=3
+#设置Event日志开启标志,0-关闭/1-开启
+export ASCEND_GLOBAL_EVENT_ENABLE=0
+#设置是否开启taskque,0-关闭/1-开启
+export TASK_QUEUE_ENABLE=1
+#设置是否开启PTCopy,0-关闭/1-开启
+export PTCOPY_ENABLE=1
+#设置是否开启combined标志,0-关闭/1-开启
+export COMBINED_ENABLE=1
+#设置特殊场景是否需要重新编译,不需要修改
+export TRI_COMBINED_ENABLE=1
+#设置特殊场景是否需要重新编译,不需要修改
+export DYNAMIC_OP="ADD#MUL"
+#HCCL白名单开关,1-关闭/0-开启
+export HCCL_WHITELIST_DISABLE=1
+
+ulimit -SHn 512000
+
+path_lib=$(python3.7 -c """
+import sys
+import re
+result=''
+for index in range(len(sys.path)):
+    match_sit = re.search('-packages', sys.path[index])
+    if match_sit is not None:
+        match_lib = re.search('lib', sys.path[index])
+
+        if match_lib is not None:
+            end=match_lib.span()[1]
+            result += sys.path[index][0:end] + ':'
+
+        result+=sys.path[index] + '/torch/lib:'
+print(result)"""
+)
+
+echo ${path_lib}
+
+export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_full_1p.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_full_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cf49eb52013060304b4a5cadc18e6c1bb712d7c9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_full_1p.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+
+##################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+#网络名称，同目录名称
+Network="CSwin-Transformer"
+#训练epoch
+train_epochs=310
+#训练batch_size
+batch_size=256
+#训练step
+#train_steps=`expr 1281167 / ${batch_size}`
+# 指定训练所使用的npu device卡id
+device_id=0
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,修改为本地数据集路径
+data_path=""
+
+#TF2.X独有，需要模型审视修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+# over_dump=False
+# data_dump_flag=False
+# data_dump_step="10"
+# profiling=False
+
+echo "all para $*"
+#参数校验，不需要修改
+for para in $*
+do
+#	echo "enter in"
+#	echo "this para $para"
+#	data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    fi
+done
+
+echo "data_path para $data_path"
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 校验单卡训练是否指定了device id，分动态分配device id 与手动指定device id，此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+    ln -s  source  dest
+elif [ ${device_id} ]; then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    echo "[Error] device id must be confing"
+    exit 1
+fi
+
+#################指定训练脚本执行路径##################
+# cd到与test文件同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ]; then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+##################创建日志输出目录，不需要修改##################
+ASCEND_DEVICE_ID=${device_id}
+if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then
+    rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+##################启动训练脚本##################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# source 环境变量
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+get_lscpu_value() {
+    awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}"
+}
+
+lscpu_out=$(lscpu)
+n_sockets=4
+n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}")
+
+echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}"
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0)); then
+        EXIT_STATUS=1
+    fi
+
+}
+export PYTHONPATH=../:$PYTHONPATH
+
+echo ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+echo "$data_path"
+python3.7 tools/train.py\
+    local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py --data_path=$data_path --max_iters=640000 \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据##################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep '300/640000'| awk -F " " '{print substr($15,0,length($15)-1)}'| awk 'END {print}'`
+echo "Final Performance FPS : $FPS" 
+
+#输出训练精度,需要模型审视修改
+MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep -a 'mIoU:' | awk 'BEGIN {max = 0} {if (substr($11,0,length($11)-1)+0 > max+0) max=substr($11,0,length($11)-1)} END {print max*100}'`
+echo "MaxAccuracy = ${MaxAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+##################将训练数据存入文件##################
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_full_8p.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_full_8p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..417f2961e496dab1752e7d62373608ec860f8ee0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_full_8p.sh
@@ -0,0 +1,179 @@
+#!/bin/bash
+
+
+##################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+#网络名称，同目录名称
+Network="CSwin-Transformer"
+#训练epoch
+train_epochs=310
+#训练batch_size
+batch_size=256
+#训练step
+#train_steps=`expr 1281167 / ${batch_size}`
+# 指定训练所使用的npu device卡id
+device_id=0
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,修改为本地数据集路径
+data_path=""
+
+#TF2.X独有，需要模型审视修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+# over_dump=False
+# data_dump_flag=False
+# data_dump_step="10"
+# profiling=False
+
+echo "all para $*"
+#参数校验，不需要修改
+for para in $*
+do
+#	echo "enter in"
+#	echo "this para $para"
+#	data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    fi
+done
+
+echo "data_path para $data_path"
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 校验单卡训练是否指定了device id，分动态分配device id 与手动指定device id，此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+    ln -s  source  dest
+elif [ ${device_id} ]; then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    echo "[Error] device id must be confing"
+    exit 1
+fi
+
+#################指定训练脚本执行路径##################
+# cd到与test文件同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ]; then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+##################创建日志输出目录，不需要修改##################
+ASCEND_DEVICE_ID=${device_id}
+if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then
+    rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+##################启动训练脚本##################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# source 环境变量
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+get_lscpu_value() {
+    awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}"
+}
+
+lscpu_out=$(lscpu)
+n_sockets=4
+n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}")
+
+echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}"
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0)); then
+        EXIT_STATUS=1
+    fi
+
+}
+export PYTHONPATH=../:$PYTHONPATH
+
+echo ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+echo "$data_path"
+python3.7 -u -m bind_pyt \
+    --nsockets_per_node ${n_sockets} \
+    --ncores_per_socket ${n_cores_per_socket} \
+    --master_addr $(hostname -I |awk '{print $1}') \
+    --no_hyperthreads \
+    --no_membind "$@" tools/train.py\
+    local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py --data_path=$data_path --max_iters=160000 \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据##################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep '300/160000'| awk -F " " '{print substr($15,0,length($15)-1)}'| awk 'END {print}'`
+echo "Final Performance FPS : $FPS" 
+
+#输出训练精度,需要模型审视修改
+MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep -a 'mIoU:' | awk 'BEGIN {max = 0} {if (substr($11,0,length($11)-1)+0 > max+0) max=substr($11,0,length($11)-1)} END {print max*100}'`
+echo "MaxAccuracy = ${MaxAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+##################将训练数据存入文件##################
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_performance_1p.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_performance_1p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cee597a70b016a643688d958c58f95c5666bffab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_performance_1p.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+
+##################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+#网络名称，同目录名称
+Network="CSwin-Transformer"
+#训练epoch
+train_epochs=310
+#训练batch_size
+batch_size=256
+#训练step
+#train_steps=`expr 1281167 / ${batch_size}`
+# 指定训练所使用的npu device卡id
+device_id=0
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,修改为本地数据集路径
+data_path=""
+
+#TF2.X独有，需要模型审视修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+# over_dump=False
+# data_dump_flag=False
+# data_dump_step="10"
+# profiling=False
+
+echo "all para $*"
+#参数校验，不需要修改
+for para in $*
+do
+#	echo "enter in"
+#	echo "this para $para"
+#	data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    fi
+done
+
+echo "data_path para $data_path"
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 校验单卡训练是否指定了device id，分动态分配device id 与手动指定device id，此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+    ln -s  source  dest
+elif [ ${device_id} ]; then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    echo "[Error] device id must be confing"
+    exit 1
+fi
+
+#################指定训练脚本执行路径##################
+# cd到与test文件同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ]; then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+##################创建日志输出目录，不需要修改##################
+ASCEND_DEVICE_ID=${device_id}
+if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then
+    rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+##################启动训练脚本##################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# source 环境变量
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+get_lscpu_value() {
+    awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}"
+}
+
+lscpu_out=$(lscpu)
+n_sockets=4
+n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}")
+
+echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}"
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0)); then
+        EXIT_STATUS=1
+    fi
+
+}
+export PYTHONPATH=../:$PYTHONPATH
+
+echo ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+echo "$data_path"
+python3.7 tools/train.py\
+    local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py --data_path=$data_path --max_iters=1000 \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据##################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep '300/1000'| awk -F " " '{print substr($15,0,length($15)-1)}'| awk 'END {print}'`
+echo "Final Performance FPS : $FPS" 
+
+#输出训练精度,需要模型审视修改
+MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep -a 'mIoU:' | awk 'BEGIN {max = 0} {if (substr($11,0,length($11)-1)+0 > max+0) max=substr($11,0,length($11)-1)} END {print max*100}'`
+echo "MaxAccuracy = ${MaxAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+##################将训练数据存入文件##################
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_performance_8p.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_performance_8p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9be817ab409337c61ebb209743790af0ba99ceb6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/test/train_performance_8p.sh
@@ -0,0 +1,179 @@
+#!/bin/bash
+
+
+##################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+#网络名称，同目录名称
+Network="CSwin-Transformer"
+#训练epoch
+train_epochs=310
+#训练batch_size
+batch_size=256
+#训练step
+#train_steps=`expr 1281167 / ${batch_size}`
+# 指定训练所使用的npu device卡id
+device_id=0
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,修改为本地数据集路径
+data_path=""
+
+#TF2.X独有，需要模型审视修改
+#export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+# over_dump=False
+# data_dump_flag=False
+# data_dump_step="10"
+# profiling=False
+
+echo "all para $*"
+#参数校验，不需要修改
+for para in $*
+do
+#	echo "enter in"
+#	echo "this para $para"
+#	data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+#	echo "datapath $data_path"
+    fi
+done
+
+echo "data_path para $data_path"
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 校验单卡训练是否指定了device id，分动态分配device id 与手动指定device id，此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+    ln -s  source  dest
+elif [ ${device_id} ]; then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    echo "[Error] device id must be confing"
+    exit 1
+fi
+
+#################指定训练脚本执行路径##################
+# cd到与test文件同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ]; then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+##################创建日志输出目录，不需要修改##################
+ASCEND_DEVICE_ID=${device_id}
+if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then
+    rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+##################启动训练脚本##################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# source 环境变量
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+get_lscpu_value() {
+    awk -F: "(\$1 == \"${1}\"){gsub(/ /, \"\", \$2); print \$2; found=1} END{exit found!=1}"
+}
+
+lscpu_out=$(lscpu)
+n_sockets=4
+n_cores_per_socket=$(get_lscpu_value 'Core(s) per socket' <<< "${lscpu_out}")
+
+echo "num_sockets = ${n_sockets} cores_per_socket=${n_cores_per_socket}"
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0)); then
+        EXIT_STATUS=1
+    fi
+
+}
+export PYTHONPATH=../:$PYTHONPATH
+
+echo ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+echo "$data_path"
+python3.7 -u -m bind_pyt \
+    --nsockets_per_node ${n_sockets} \
+    --ncores_per_socket ${n_cores_per_socket} \
+    --master_addr $(hostname -I |awk '{print $1}') \
+    --no_hyperthreads \
+    --no_membind "$@" tools/train.py\
+    local_configs/segformer/B0/segformer.b0.1024x1024.city.160k.py --data_path=$data_path --max_iters=1000 \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据##################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep '300/1000'| awk -F " " '{print substr($15,0,length($15)-1)}'| awk 'END {print}'`
+echo "Final Performance FPS : $FPS" 
+
+#输出训练精度,需要模型审视修改
+MaxAccuracy=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep -a 'mIoU:' | awk 'BEGIN {max = 0} {if (substr($11,0,length($11)-1)+0 > max+0) max=substr($11,0,length($11)-1)} END {print max*100}'`
+echo "MaxAccuracy = ${MaxAccuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+##################将训练数据存入文件##################
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_config.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..b991fbfd31233b113a92e927ca6072b54457f73e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_config.py
@@ -0,0 +1,160 @@
+import glob
+import os
+from os.path import dirname, exists, isdir, join, relpath
+
+from mmcv import Config
+from torch import nn
+
+from mmseg.models import build_segmentor
+
+
+def _get_config_directory():
+    """Find the predefined segmentor config directory."""
+    try:
+        # Assume we are running in the source mmsegmentation repo
+        repo_dpath = dirname(dirname(__file__))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmseg
+        repo_dpath = dirname(dirname(mmseg.__file__))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+
+
+def test_config_build_segmentor():
+    """Test that all segmentation models defined in the configs can be
+    initialized."""
+    config_dpath = _get_config_directory()
+    print('Found config_dpath = {!r}'.format(config_dpath))
+
+    config_fpaths = []
+    # one config each sub folder
+    for sub_folder in os.listdir(config_dpath):
+        if isdir(sub_folder):
+            config_fpaths.append(
+                list(glob.glob(join(config_dpath, sub_folder, '*.py')))[0])
+    config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
+    config_names = [relpath(p, config_dpath) for p in config_fpaths]
+
+    print('Using {} config files'.format(len(config_names)))
+
+    for config_fname in config_names:
+        config_fpath = join(config_dpath, config_fname)
+        config_mod = Config.fromfile(config_fpath)
+
+        config_mod.model
+        print('Building segmentor, config_fpath = {!r}'.format(config_fpath))
+
+        # Remove pretrained keys to allow for testing in an offline environment
+        if 'pretrained' in config_mod.model:
+            config_mod.model['pretrained'] = None
+
+        print('building {}'.format(config_fname))
+        segmentor = build_segmentor(config_mod.model)
+        assert segmentor is not None
+
+        head_config = config_mod.model['decode_head']
+        _check_decode_head(head_config, segmentor.decode_head)
+
+
+def test_config_data_pipeline():
+    """Test whether the data pipeline is valid and can process corner cases.
+
+    CommandLine:
+        xdoctest -m tests/test_config.py test_config_build_data_pipeline
+    """
+    from mmcv import Config
+    from mmseg.datasets.pipelines import Compose
+    import numpy as np
+
+    config_dpath = _get_config_directory()
+    print('Found config_dpath = {!r}'.format(config_dpath))
+
+    import glob
+    config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))
+    config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
+    config_names = [relpath(p, config_dpath) for p in config_fpaths]
+
+    print('Using {} config files'.format(len(config_names)))
+
+    for config_fname in config_names:
+        config_fpath = join(config_dpath, config_fname)
+        print(
+            'Building data pipeline, config_fpath = {!r}'.format(config_fpath))
+        config_mod = Config.fromfile(config_fpath)
+
+        # remove loading pipeline
+        load_img_pipeline = config_mod.train_pipeline.pop(0)
+        to_float32 = load_img_pipeline.get('to_float32', False)
+        config_mod.train_pipeline.pop(0)
+        config_mod.test_pipeline.pop(0)
+
+        train_pipeline = Compose(config_mod.train_pipeline)
+        test_pipeline = Compose(config_mod.test_pipeline)
+
+        img = np.random.randint(0, 255, size=(1024, 2048, 3), dtype=np.uint8)
+        if to_float32:
+            img = img.astype(np.float32)
+        seg = np.random.randint(0, 255, size=(1024, 2048, 1), dtype=np.uint8)
+
+        results = dict(
+            filename='test_img.png',
+            ori_filename='test_img.png',
+            img=img,
+            img_shape=img.shape,
+            ori_shape=img.shape,
+            gt_semantic_seg=seg)
+        results['seg_fields'] = ['gt_semantic_seg']
+
+        print('Test training data pipeline: \n{!r}'.format(train_pipeline))
+        output_results = train_pipeline(results)
+        assert output_results is not None
+
+        results = dict(
+            filename='test_img.png',
+            ori_filename='test_img.png',
+            img=img,
+            img_shape=img.shape,
+            ori_shape=img.shape,
+        )
+        print('Test testing data pipeline: \n{!r}'.format(test_pipeline))
+        output_results = test_pipeline(results)
+        assert output_results is not None
+
+
+def _check_decode_head(decode_head_cfg, decode_head):
+    if isinstance(decode_head_cfg, list):
+        assert isinstance(decode_head, nn.ModuleList)
+        assert len(decode_head_cfg) == len(decode_head)
+        num_heads = len(decode_head)
+        for i in range(num_heads):
+            _check_decode_head(decode_head_cfg[i], decode_head[i])
+        return
+    # check consistency between head_config and roi_head
+    assert decode_head_cfg['type'] == decode_head.__class__.__name__
+
+    assert decode_head_cfg['type'] == decode_head.__class__.__name__
+
+    in_channels = decode_head_cfg.in_channels
+    input_transform = decode_head.input_transform
+    assert input_transform in ['resize_concat', 'multiple_select', None]
+    if input_transform is not None:
+        assert isinstance(in_channels, (list, tuple))
+        assert isinstance(decode_head.in_index, (list, tuple))
+        assert len(in_channels) == len(decode_head.in_index)
+    elif input_transform == 'resize_concat':
+        assert sum(in_channels) == decode_head.in_channels
+    else:
+        assert isinstance(in_channels, int)
+        assert in_channels == decode_head.in_channels
+        assert isinstance(decode_head.in_index, int)
+
+    if decode_head_cfg['type'] == 'PointHead':
+        assert decode_head_cfg.channels+decode_head_cfg.num_classes == \
+               decode_head.fc_seg.in_channels
+        assert decode_head.fc_seg.out_channels == decode_head_cfg.num_classes
+    else:
+        assert decode_head_cfg.channels == decode_head.conv_seg.in_channels
+        assert decode_head.conv_seg.out_channels == decode_head_cfg.num_classes
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_dataset.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e19c30f084aefd5a7567f3b05aa3a64e25db4bf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_dataset.py
@@ -0,0 +1,291 @@
+import os.path as osp
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+from mmseg.core.evaluation import get_classes, get_palette
+from mmseg.datasets import (DATASETS, ADE20KDataset, CityscapesDataset,
+                            ConcatDataset, CustomDataset, PascalVOCDataset,
+                            RepeatDataset)
+
+
+def test_classes():
+    assert list(CityscapesDataset.CLASSES) == get_classes('cityscapes')
+    assert list(PascalVOCDataset.CLASSES) == get_classes('voc') == get_classes(
+        'pascal_voc')
+    assert list(
+        ADE20KDataset.CLASSES) == get_classes('ade') == get_classes('ade20k')
+
+    with pytest.raises(ValueError):
+        get_classes('unsupported')
+
+
+def test_palette():
+    assert CityscapesDataset.PALETTE == get_palette('cityscapes')
+    assert PascalVOCDataset.PALETTE == get_palette('voc') == get_palette(
+        'pascal_voc')
+    assert ADE20KDataset.PALETTE == get_palette('ade') == get_palette('ade20k')
+
+    with pytest.raises(ValueError):
+        get_palette('unsupported')
+
+
+@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
+@patch('mmseg.datasets.CustomDataset.__getitem__',
+       MagicMock(side_effect=lambda idx: idx))
+def test_dataset_wrapper():
+    # CustomDataset.load_annotations = MagicMock()
+    # CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx)
+    dataset_a = CustomDataset(img_dir=MagicMock(), pipeline=[])
+    len_a = 10
+    dataset_a.img_infos = MagicMock()
+    dataset_a.img_infos.__len__.return_value = len_a
+    dataset_b = CustomDataset(img_dir=MagicMock(), pipeline=[])
+    len_b = 20
+    dataset_b.img_infos = MagicMock()
+    dataset_b.img_infos.__len__.return_value = len_b
+
+    concat_dataset = ConcatDataset([dataset_a, dataset_b])
+    assert concat_dataset[5] == 5
+    assert concat_dataset[25] == 15
+    assert len(concat_dataset) == len(dataset_a) + len(dataset_b)
+
+    repeat_dataset = RepeatDataset(dataset_a, 10)
+    assert repeat_dataset[5] == 5
+    assert repeat_dataset[15] == 5
+    assert repeat_dataset[27] == 7
+    assert len(repeat_dataset) == 10 * len(dataset_a)
+
+
+def test_custom_dataset():
+    img_norm_cfg = dict(
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        to_rgb=True)
+    crop_size = (512, 1024)
+    train_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(type='LoadAnnotations'),
+        dict(type='Resize', img_scale=(128, 256), ratio_range=(0.5, 2.0)),
+        dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+        dict(type='RandomFlip', prob=0.5),
+        dict(type='PhotoMetricDistortion'),
+        dict(type='Normalize', **img_norm_cfg),
+        dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+        dict(type='DefaultFormatBundle'),
+        dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+    ]
+    test_pipeline = [
+        dict(type='LoadImageFromFile'),
+        dict(
+            type='MultiScaleFlipAug',
+            img_scale=(128, 256),
+            # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+            flip=False,
+            transforms=[
+                dict(type='Resize', keep_ratio=True),
+                dict(type='RandomFlip'),
+                dict(type='Normalize', **img_norm_cfg),
+                dict(type='ImageToTensor', keys=['img']),
+                dict(type='Collect', keys=['img']),
+            ])
+    ]
+
+    # with img_dir and ann_dir
+    train_dataset = CustomDataset(
+        train_pipeline,
+        data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'),
+        img_dir='imgs/',
+        ann_dir='gts/',
+        img_suffix='img.jpg',
+        seg_map_suffix='gt.png')
+    assert len(train_dataset) == 5
+
+    # with img_dir, ann_dir, split
+    train_dataset = CustomDataset(
+        train_pipeline,
+        data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'),
+        img_dir='imgs/',
+        ann_dir='gts/',
+        img_suffix='img.jpg',
+        seg_map_suffix='gt.png',
+        split='splits/train.txt')
+    assert len(train_dataset) == 4
+
+    # no data_root
+    train_dataset = CustomDataset(
+        train_pipeline,
+        img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs'),
+        ann_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/gts'),
+        img_suffix='img.jpg',
+        seg_map_suffix='gt.png')
+    assert len(train_dataset) == 5
+
+    # with data_root but img_dir/ann_dir are abs path
+    train_dataset = CustomDataset(
+        train_pipeline,
+        data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'),
+        img_dir=osp.abspath(
+            osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs')),
+        ann_dir=osp.abspath(
+            osp.join(osp.dirname(__file__), '../data/pseudo_dataset/gts')),
+        img_suffix='img.jpg',
+        seg_map_suffix='gt.png')
+    assert len(train_dataset) == 5
+
+    # test_mode=True
+    test_dataset = CustomDataset(
+        test_pipeline,
+        img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs'),
+        img_suffix='img.jpg',
+        test_mode=True)
+    assert len(test_dataset) == 5
+
+    # training data get
+    train_data = train_dataset[0]
+    assert isinstance(train_data, dict)
+
+    # test data get
+    test_data = test_dataset[0]
+    assert isinstance(test_data, dict)
+
+    # get gt seg map
+    gt_seg_maps = train_dataset.get_gt_seg_maps()
+    assert len(gt_seg_maps) == 5
+
+    # evaluation
+    pseudo_results = []
+    for gt_seg_map in gt_seg_maps:
+        h, w = gt_seg_map.shape
+        pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w)))
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mIoU')
+    assert isinstance(eval_results, dict)
+    assert 'mIoU' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mDice')
+    assert isinstance(eval_results, dict)
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
+    eval_results = train_dataset.evaluate(
+        pseudo_results, metric=['mDice', 'mIoU'])
+    assert isinstance(eval_results, dict)
+    assert 'mIoU' in eval_results
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
+    # evaluation with CLASSES
+    train_dataset.CLASSES = tuple(['a'] * 7)
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mIoU')
+    assert isinstance(eval_results, dict)
+    assert 'mIoU' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mDice')
+    assert isinstance(eval_results, dict)
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
+    eval_results = train_dataset.evaluate(
+        pseudo_results, metric=['mIoU', 'mDice'])
+    assert isinstance(eval_results, dict)
+    assert 'mIoU' in eval_results
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
+
+@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
+@patch('mmseg.datasets.CustomDataset.__getitem__',
+       MagicMock(side_effect=lambda idx: idx))
+@pytest.mark.parametrize('dataset, classes', [
+    ('ADE20KDataset', ('wall', 'building')),
+    ('CityscapesDataset', ('road', 'sidewalk')),
+    ('CustomDataset', ('bus', 'car')),
+    ('PascalVOCDataset', ('aeroplane', 'bicycle')),
+])
+def test_custom_classes_override_default(dataset, classes):
+
+    dataset_class = DATASETS.get(dataset)
+
+    original_classes = dataset_class.CLASSES
+
+    # Test setting classes as a tuple
+    custom_dataset = dataset_class(
+        pipeline=[],
+        img_dir=MagicMock(),
+        split=MagicMock(),
+        classes=classes,
+        test_mode=True)
+
+    assert custom_dataset.CLASSES != original_classes
+    assert custom_dataset.CLASSES == classes
+
+    # Test setting classes as a list
+    custom_dataset = dataset_class(
+        pipeline=[],
+        img_dir=MagicMock(),
+        split=MagicMock(),
+        classes=list(classes),
+        test_mode=True)
+
+    assert custom_dataset.CLASSES != original_classes
+    assert custom_dataset.CLASSES == list(classes)
+
+    # Test overriding not a subset
+    custom_dataset = dataset_class(
+        pipeline=[],
+        img_dir=MagicMock(),
+        split=MagicMock(),
+        classes=[classes[0]],
+        test_mode=True)
+
+    assert custom_dataset.CLASSES != original_classes
+    assert custom_dataset.CLASSES == [classes[0]]
+
+    # Test default behavior
+    custom_dataset = dataset_class(
+        pipeline=[],
+        img_dir=MagicMock(),
+        split=MagicMock(),
+        classes=None,
+        test_mode=True)
+
+    assert custom_dataset.CLASSES == original_classes
+
+
+@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
+@patch('mmseg.datasets.CustomDataset.__getitem__',
+       MagicMock(side_effect=lambda idx: idx))
+def test_custom_dataset_random_palette_is_generated():
+    dataset = CustomDataset(
+        pipeline=[],
+        img_dir=MagicMock(),
+        split=MagicMock(),
+        classes=('bus', 'car'),
+        test_mode=True)
+    assert len(dataset.PALETTE) == 2
+    for class_color in dataset.PALETTE:
+        assert len(class_color) == 3
+        assert all(x >= 0 and x <= 255 for x in class_color)
+
+
+@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
+@patch('mmseg.datasets.CustomDataset.__getitem__',
+       MagicMock(side_effect=lambda idx: idx))
+def test_custom_dataset_custom_palette():
+    dataset = CustomDataset(
+        pipeline=[],
+        img_dir=MagicMock(),
+        split=MagicMock(),
+        classes=('bus', 'car'),
+        palette=[[100, 100, 100], [200, 200, 200]],
+        test_mode=True)
+    assert tuple(dataset.PALETTE) == tuple([[100, 100, 100], [200, 200, 200]])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_dataset_builder.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_dataset_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6827e4d1758aa43716aceab793bb33da85a2398
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_dataset_builder.py
@@ -0,0 +1,192 @@
+import math
+import os.path as osp
+
+import pytest
+from torch.utils.data import (DistributedSampler, RandomSampler,
+                              SequentialSampler)
+
+from mmseg.datasets import (DATASETS, ConcatDataset, build_dataloader,
+                            build_dataset)
+
+
+@DATASETS.register_module()
+class ToyDataset(object):
+
+    def __init__(self, cnt=0):
+        self.cnt = cnt
+
+    def __item__(self, idx):
+        return idx
+
+    def __len__(self):
+        return 100
+
+
+def test_build_dataset():
+    cfg = dict(type='ToyDataset')
+    dataset = build_dataset(cfg)
+    assert isinstance(dataset, ToyDataset)
+    assert dataset.cnt == 0
+    dataset = build_dataset(cfg, default_args=dict(cnt=1))
+    assert isinstance(dataset, ToyDataset)
+    assert dataset.cnt == 1
+
+    data_root = osp.join(osp.dirname(__file__), '../data/pseudo_dataset')
+    img_dir = 'imgs/'
+    ann_dir = 'gts/'
+
+    # We use same dir twice for simplicity
+    # with ann_dir
+    cfg = dict(
+        type='CustomDataset',
+        pipeline=[],
+        data_root=data_root,
+        img_dir=[img_dir, img_dir],
+        ann_dir=[ann_dir, ann_dir])
+    dataset = build_dataset(cfg)
+    assert isinstance(dataset, ConcatDataset)
+    assert len(dataset) == 10
+
+    # with ann_dir, split
+    cfg = dict(
+        type='CustomDataset',
+        pipeline=[],
+        data_root=data_root,
+        img_dir=img_dir,
+        ann_dir=ann_dir,
+        split=['splits/train.txt', 'splits/val.txt'])
+    dataset = build_dataset(cfg)
+    assert isinstance(dataset, ConcatDataset)
+    assert len(dataset) == 5
+
+    # with ann_dir, split
+    cfg = dict(
+        type='CustomDataset',
+        pipeline=[],
+        data_root=data_root,
+        img_dir=img_dir,
+        ann_dir=[ann_dir, ann_dir],
+        split=['splits/train.txt', 'splits/val.txt'])
+    dataset = build_dataset(cfg)
+    assert isinstance(dataset, ConcatDataset)
+    assert len(dataset) == 5
+
+    # test mode
+    cfg = dict(
+        type='CustomDataset',
+        pipeline=[],
+        data_root=data_root,
+        img_dir=[img_dir, img_dir],
+        test_mode=True)
+    dataset = build_dataset(cfg)
+    assert isinstance(dataset, ConcatDataset)
+    assert len(dataset) == 10
+
+    # test mode with splits
+    cfg = dict(
+        type='CustomDataset',
+        pipeline=[],
+        data_root=data_root,
+        img_dir=[img_dir, img_dir],
+        split=['splits/val.txt', 'splits/val.txt'],
+        test_mode=True)
+    dataset = build_dataset(cfg)
+    assert isinstance(dataset, ConcatDataset)
+    assert len(dataset) == 2
+
+    # len(ann_dir) should be zero or len(img_dir) when len(img_dir) > 1
+    with pytest.raises(AssertionError):
+        cfg = dict(
+            type='CustomDataset',
+            pipeline=[],
+            data_root=data_root,
+            img_dir=[img_dir, img_dir],
+            ann_dir=[ann_dir, ann_dir, ann_dir])
+        build_dataset(cfg)
+
+    # len(splits) should be zero or len(img_dir) when len(img_dir) > 1
+    with pytest.raises(AssertionError):
+        cfg = dict(
+            type='CustomDataset',
+            pipeline=[],
+            data_root=data_root,
+            img_dir=[img_dir, img_dir],
+            split=['splits/val.txt', 'splits/val.txt', 'splits/val.txt'])
+        build_dataset(cfg)
+
+    # len(splits) == len(ann_dir) when only len(img_dir) == 1 and len(
+    # ann_dir) > 1
+    with pytest.raises(AssertionError):
+        cfg = dict(
+            type='CustomDataset',
+            pipeline=[],
+            data_root=data_root,
+            img_dir=img_dir,
+            ann_dir=[ann_dir, ann_dir],
+            split=['splits/val.txt', 'splits/val.txt', 'splits/val.txt'])
+        build_dataset(cfg)
+
+
+def test_build_dataloader():
+    dataset = ToyDataset()
+    samples_per_gpu = 3
+    # dist=True, shuffle=True, 1GPU
+    dataloader = build_dataloader(
+        dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2)
+    assert dataloader.batch_size == samples_per_gpu
+    assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+    assert isinstance(dataloader.sampler, DistributedSampler)
+    assert dataloader.sampler.shuffle
+
+    # dist=True, shuffle=False, 1GPU
+    dataloader = build_dataloader(
+        dataset,
+        samples_per_gpu=samples_per_gpu,
+        workers_per_gpu=2,
+        shuffle=False)
+    assert dataloader.batch_size == samples_per_gpu
+    assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+    assert isinstance(dataloader.sampler, DistributedSampler)
+    assert not dataloader.sampler.shuffle
+
+    # dist=True, shuffle=True, 8GPU
+    dataloader = build_dataloader(
+        dataset,
+        samples_per_gpu=samples_per_gpu,
+        workers_per_gpu=2,
+        num_gpus=8)
+    assert dataloader.batch_size == samples_per_gpu
+    assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+    assert dataloader.num_workers == 2
+
+    # dist=False, shuffle=True, 1GPU
+    dataloader = build_dataloader(
+        dataset,
+        samples_per_gpu=samples_per_gpu,
+        workers_per_gpu=2,
+        dist=False)
+    assert dataloader.batch_size == samples_per_gpu
+    assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+    assert isinstance(dataloader.sampler, RandomSampler)
+    assert dataloader.num_workers == 2
+
+    # dist=False, shuffle=False, 1GPU
+    dataloader = build_dataloader(
+        dataset,
+        samples_per_gpu=3,
+        workers_per_gpu=2,
+        shuffle=False,
+        dist=False)
+    assert dataloader.batch_size == samples_per_gpu
+    assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu))
+    assert isinstance(dataloader.sampler, SequentialSampler)
+    assert dataloader.num_workers == 2
+
+    # dist=False, shuffle=True, 8GPU
+    dataloader = build_dataloader(
+        dataset, samples_per_gpu=3, workers_per_gpu=2, num_gpus=8, dist=False)
+    assert dataloader.batch_size == samples_per_gpu * 8
+    assert len(dataloader) == int(
+        math.ceil(len(dataset) / samples_per_gpu / 8))
+    assert isinstance(dataloader.sampler, RandomSampler)
+    assert dataloader.num_workers == 16
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_loading.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8aa5d31301caa9a40dfa8ebd8f39ee72d4f6a8f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_loading.py
@@ -0,0 +1,198 @@
+import copy
+import os.path as osp
+import tempfile
+
+import mmcv
+import numpy as np
+
+from mmseg.datasets.pipelines import LoadAnnotations, LoadImageFromFile
+
+
+class TestLoading(object):
+
+    @classmethod
+    def setup_class(cls):
+        cls.data_prefix = osp.join(osp.dirname(__file__), '../data')
+
+    def test_load_img(self):
+        results = dict(
+            img_prefix=self.data_prefix, img_info=dict(filename='color.jpg'))
+        transform = LoadImageFromFile()
+        results = transform(copy.deepcopy(results))
+        assert results['filename'] == osp.join(self.data_prefix, 'color.jpg')
+        assert results['ori_filename'] == 'color.jpg'
+        assert results['img'].shape == (288, 512, 3)
+        assert results['img'].dtype == np.uint8
+        assert results['img_shape'] == (288, 512, 3)
+        assert results['ori_shape'] == (288, 512, 3)
+        assert results['pad_shape'] == (288, 512, 3)
+        assert results['scale_factor'] == 1.0
+        np.testing.assert_equal(results['img_norm_cfg']['mean'],
+                                np.zeros(3, dtype=np.float32))
+        assert repr(transform) == transform.__class__.__name__ + \
+            "(to_float32=False,color_type='color',imdecode_backend='cv2')"
+
+        # no img_prefix
+        results = dict(
+            img_prefix=None, img_info=dict(filename='tests/data/color.jpg'))
+        transform = LoadImageFromFile()
+        results = transform(copy.deepcopy(results))
+        assert results['filename'] == 'tests/data/color.jpg'
+        assert results['ori_filename'] == 'tests/data/color.jpg'
+        assert results['img'].shape == (288, 512, 3)
+
+        # to_float32
+        transform = LoadImageFromFile(to_float32=True)
+        results = transform(copy.deepcopy(results))
+        assert results['img'].dtype == np.float32
+
+        # gray image
+        results = dict(
+            img_prefix=self.data_prefix, img_info=dict(filename='gray.jpg'))
+        transform = LoadImageFromFile()
+        results = transform(copy.deepcopy(results))
+        assert results['img'].shape == (288, 512, 3)
+        assert results['img'].dtype == np.uint8
+
+        transform = LoadImageFromFile(color_type='unchanged')
+        results = transform(copy.deepcopy(results))
+        assert results['img'].shape == (288, 512)
+        assert results['img'].dtype == np.uint8
+        np.testing.assert_equal(results['img_norm_cfg']['mean'],
+                                np.zeros(1, dtype=np.float32))
+
+    def test_load_seg(self):
+        results = dict(
+            seg_prefix=self.data_prefix,
+            ann_info=dict(seg_map='seg.png'),
+            seg_fields=[])
+        transform = LoadAnnotations()
+        results = transform(copy.deepcopy(results))
+        assert results['seg_fields'] == ['gt_semantic_seg']
+        assert results['gt_semantic_seg'].shape == (288, 512)
+        assert results['gt_semantic_seg'].dtype == np.uint8
+        assert repr(transform) == transform.__class__.__name__ + \
+            "(reduce_zero_label=False,imdecode_backend='pillow')"
+
+        # no img_prefix
+        results = dict(
+            seg_prefix=None,
+            ann_info=dict(seg_map='tests/data/seg.png'),
+            seg_fields=[])
+        transform = LoadAnnotations()
+        results = transform(copy.deepcopy(results))
+        assert results['gt_semantic_seg'].shape == (288, 512)
+        assert results['gt_semantic_seg'].dtype == np.uint8
+
+        # reduce_zero_label
+        transform = LoadAnnotations(reduce_zero_label=True)
+        results = transform(copy.deepcopy(results))
+        assert results['gt_semantic_seg'].shape == (288, 512)
+        assert results['gt_semantic_seg'].dtype == np.uint8
+
+        # mmcv backend
+        results = dict(
+            seg_prefix=self.data_prefix,
+            ann_info=dict(seg_map='seg.png'),
+            seg_fields=[])
+        transform = LoadAnnotations(imdecode_backend='pillow')
+        results = transform(copy.deepcopy(results))
+        # this image is saved by PIL
+        assert results['gt_semantic_seg'].shape == (288, 512)
+        assert results['gt_semantic_seg'].dtype == np.uint8
+
+    def test_load_seg_custom_classes(self):
+
+        test_img = np.random.rand(10, 10)
+        test_gt = np.zeros_like(test_img)
+        test_gt[2:4, 2:4] = 1
+        test_gt[2:4, 6:8] = 2
+        test_gt[6:8, 2:4] = 3
+        test_gt[6:8, 6:8] = 4
+
+        tmp_dir = tempfile.TemporaryDirectory()
+        img_path = osp.join(tmp_dir.name, 'img.jpg')
+        gt_path = osp.join(tmp_dir.name, 'gt.png')
+
+        mmcv.imwrite(test_img, img_path)
+        mmcv.imwrite(test_gt, gt_path)
+
+        # test only train with label with id 3
+        results = dict(
+            img_info=dict(filename=img_path),
+            ann_info=dict(seg_map=gt_path),
+            label_map={
+                0: 0,
+                1: 0,
+                2: 0,
+                3: 1,
+                4: 0
+            },
+            seg_fields=[])
+
+        load_imgs = LoadImageFromFile()
+        results = load_imgs(copy.deepcopy(results))
+
+        load_anns = LoadAnnotations()
+        results = load_anns(copy.deepcopy(results))
+
+        gt_array = results['gt_semantic_seg']
+
+        true_mask = np.zeros_like(gt_array)
+        true_mask[6:8, 2:4] = 1
+
+        assert results['seg_fields'] == ['gt_semantic_seg']
+        assert gt_array.shape == (10, 10)
+        assert gt_array.dtype == np.uint8
+        np.testing.assert_array_equal(gt_array, true_mask)
+
+        # test only train with label with id 4 and 3
+        results = dict(
+            img_info=dict(filename=img_path),
+            ann_info=dict(seg_map=gt_path),
+            label_map={
+                0: 0,
+                1: 0,
+                2: 0,
+                3: 2,
+                4: 1
+            },
+            seg_fields=[])
+
+        load_imgs = LoadImageFromFile()
+        results = load_imgs(copy.deepcopy(results))
+
+        load_anns = LoadAnnotations()
+        results = load_anns(copy.deepcopy(results))
+
+        gt_array = results['gt_semantic_seg']
+
+        true_mask = np.zeros_like(gt_array)
+        true_mask[6:8, 2:4] = 2
+        true_mask[6:8, 6:8] = 1
+
+        assert results['seg_fields'] == ['gt_semantic_seg']
+        assert gt_array.shape == (10, 10)
+        assert gt_array.dtype == np.uint8
+        np.testing.assert_array_equal(gt_array, true_mask)
+
+        # test no custom classes
+        results = dict(
+            img_info=dict(filename=img_path),
+            ann_info=dict(seg_map=gt_path),
+            seg_fields=[])
+
+        load_imgs = LoadImageFromFile()
+        results = load_imgs(copy.deepcopy(results))
+
+        load_anns = LoadAnnotations()
+        results = load_anns(copy.deepcopy(results))
+
+        gt_array = results['gt_semantic_seg']
+
+        assert results['seg_fields'] == ['gt_semantic_seg']
+        assert gt_array.shape == (10, 10)
+        assert gt_array.dtype == np.uint8
+        np.testing.assert_array_equal(gt_array, test_gt)
+
+        tmp_dir.cleanup()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_transform.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6417575c39e76607a522b6d1345e5ed2e19999e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_transform.py
@@ -0,0 +1,478 @@
+import copy
+import os.path as osp
+
+import mmcv
+import numpy as np
+import pytest
+from mmcv.utils import build_from_cfg
+from PIL import Image
+
+from mmseg.datasets.builder import PIPELINES
+
+
+def test_resize():
+    # test assertion if img_scale is a list
+    with pytest.raises(AssertionError):
+        transform = dict(type='Resize', img_scale=[1333, 800], keep_ratio=True)
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion if len(img_scale) while ratio_range is not None
+    with pytest.raises(AssertionError):
+        transform = dict(
+            type='Resize',
+            img_scale=[(1333, 800), (1333, 600)],
+            ratio_range=(0.9, 1.1),
+            keep_ratio=True)
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion for invalid multiscale_mode
+    with pytest.raises(AssertionError):
+        transform = dict(
+            type='Resize',
+            img_scale=[(1333, 800), (1333, 600)],
+            keep_ratio=True,
+            multiscale_mode='2333')
+        build_from_cfg(transform, PIPELINES)
+
+    transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True)
+    resize_module = build_from_cfg(transform, PIPELINES)
+
+    results = dict()
+    # (288, 512, 3)
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    resized_results = resize_module(results.copy())
+    assert resized_results['img_shape'] == (750, 1333, 3)
+
+    # test keep_ratio=False
+    transform = dict(
+        type='Resize',
+        img_scale=(1280, 800),
+        multiscale_mode='value',
+        keep_ratio=False)
+    resize_module = build_from_cfg(transform, PIPELINES)
+    resized_results = resize_module(results.copy())
+    assert resized_results['img_shape'] == (800, 1280, 3)
+
+    # test multiscale_mode='range'
+    transform = dict(
+        type='Resize',
+        img_scale=[(1333, 400), (1333, 1200)],
+        multiscale_mode='range',
+        keep_ratio=True)
+    resize_module = build_from_cfg(transform, PIPELINES)
+    resized_results = resize_module(results.copy())
+    assert max(resized_results['img_shape'][:2]) <= 1333
+    assert min(resized_results['img_shape'][:2]) >= 400
+    assert min(resized_results['img_shape'][:2]) <= 1200
+
+    # test multiscale_mode='value'
+    transform = dict(
+        type='Resize',
+        img_scale=[(1333, 800), (1333, 400)],
+        multiscale_mode='value',
+        keep_ratio=True)
+    resize_module = build_from_cfg(transform, PIPELINES)
+    resized_results = resize_module(results.copy())
+    assert resized_results['img_shape'] in [(750, 1333, 3), (400, 711, 3)]
+
+    # test multiscale_mode='range'
+    transform = dict(
+        type='Resize',
+        img_scale=(1333, 800),
+        ratio_range=(0.9, 1.1),
+        keep_ratio=True)
+    resize_module = build_from_cfg(transform, PIPELINES)
+    resized_results = resize_module(results.copy())
+    assert max(resized_results['img_shape'][:2]) <= 1333 * 1.1
+
+    # test img_scale=None and ratio_range is tuple.
+    # img shape: (288, 512, 3)
+    transform = dict(
+        type='Resize', img_scale=None, ratio_range=(0.5, 2.0), keep_ratio=True)
+    resize_module = build_from_cfg(transform, PIPELINES)
+    resized_results = resize_module(results.copy())
+    assert int(288 * 0.5) <= resized_results['img_shape'][0] <= 288 * 2.0
+    assert int(512 * 0.5) <= resized_results['img_shape'][1] <= 512 * 2.0
+
+
+def test_flip():
+    # test assertion for invalid prob
+    with pytest.raises(AssertionError):
+        transform = dict(type='RandomFlip', prob=1.5)
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion for invalid direction
+    with pytest.raises(AssertionError):
+        transform = dict(type='RandomFlip', prob=1, direction='horizonta')
+        build_from_cfg(transform, PIPELINES)
+
+    transform = dict(type='RandomFlip', prob=1)
+    flip_module = build_from_cfg(transform, PIPELINES)
+
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    original_img = copy.deepcopy(img)
+    seg = np.array(
+        Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+    original_seg = copy.deepcopy(seg)
+    results['img'] = img
+    results['gt_semantic_seg'] = seg
+    results['seg_fields'] = ['gt_semantic_seg']
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = flip_module(results)
+
+    flip_module = build_from_cfg(transform, PIPELINES)
+    results = flip_module(results)
+    assert np.equal(original_img, results['img']).all()
+    assert np.equal(original_seg, results['gt_semantic_seg']).all()
+
+
+def test_random_crop():
+    # test assertion for invalid random crop
+    with pytest.raises(AssertionError):
+        transform = dict(type='RandomCrop', crop_size=(-1, 0))
+        build_from_cfg(transform, PIPELINES)
+
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    seg = np.array(
+        Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+    results['img'] = img
+    results['gt_semantic_seg'] = seg
+    results['seg_fields'] = ['gt_semantic_seg']
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    h, w, _ = img.shape
+    transform = dict(type='RandomCrop', crop_size=(h - 20, w - 20))
+    crop_module = build_from_cfg(transform, PIPELINES)
+    results = crop_module(results)
+    assert results['img'].shape[:2] == (h - 20, w - 20)
+    assert results['img_shape'][:2] == (h - 20, w - 20)
+    assert results['gt_semantic_seg'].shape[:2] == (h - 20, w - 20)
+
+
+def test_pad():
+    # test assertion if both size_divisor and size is None
+    with pytest.raises(AssertionError):
+        transform = dict(type='Pad')
+        build_from_cfg(transform, PIPELINES)
+
+    transform = dict(type='Pad', size_divisor=32)
+    transform = build_from_cfg(transform, PIPELINES)
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    original_img = copy.deepcopy(img)
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+    # original img already divisible by 32
+    assert np.equal(results['img'], original_img).all()
+    img_shape = results['img'].shape
+    assert img_shape[0] % 32 == 0
+    assert img_shape[1] % 32 == 0
+
+    resize_transform = dict(
+        type='Resize', img_scale=(1333, 800), keep_ratio=True)
+    resize_module = build_from_cfg(resize_transform, PIPELINES)
+    results = resize_module(results)
+    results = transform(results)
+    img_shape = results['img'].shape
+    assert img_shape[0] % 32 == 0
+    assert img_shape[1] % 32 == 0
+
+
+def test_rotate():
+    # test assertion degree should be tuple[float] or float
+    with pytest.raises(AssertionError):
+        transform = dict(type='RandomRotate', prob=0.5, degree=-10)
+        build_from_cfg(transform, PIPELINES)
+    # test assertion degree should be tuple[float] or float
+    with pytest.raises(AssertionError):
+        transform = dict(type='RandomRotate', prob=0.5, degree=(10., 20., 30.))
+        build_from_cfg(transform, PIPELINES)
+
+    transform = dict(type='RandomRotate', degree=10., prob=1.)
+    transform = build_from_cfg(transform, PIPELINES)
+
+    assert str(transform) == f'RandomRotate(' \
+                             f'prob={1.}, ' \
+                             f'degree=({-10.}, {10.}), ' \
+                             f'pad_val={0}, ' \
+                             f'seg_pad_val={255}, ' \
+                             f'center={None}, ' \
+                             f'auto_bound={False})'
+
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    h, w, _ = img.shape
+    seg = np.array(
+        Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+    results['img'] = img
+    results['gt_semantic_seg'] = seg
+    results['seg_fields'] = ['gt_semantic_seg']
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+    assert results['img'].shape[:2] == (h, w)
+    assert results['gt_semantic_seg'].shape[:2] == (h, w)
+
+
+def test_normalize():
+    img_norm_cfg = dict(
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        to_rgb=True)
+    transform = dict(type='Normalize', **img_norm_cfg)
+    transform = build_from_cfg(transform, PIPELINES)
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    original_img = copy.deepcopy(img)
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+
+    mean = np.array(img_norm_cfg['mean'])
+    std = np.array(img_norm_cfg['std'])
+    converted_img = (original_img[..., ::-1] - mean) / std
+    assert np.allclose(results['img'], converted_img)
+
+
+def test_rgb2gray():
+    # test assertion out_channels should be greater than 0
+    with pytest.raises(AssertionError):
+        transform = dict(type='RGB2Gray', out_channels=-1)
+        build_from_cfg(transform, PIPELINES)
+    # test assertion weights should be tuple[float]
+    with pytest.raises(AssertionError):
+        transform = dict(type='RGB2Gray', out_channels=1, weights=1.1)
+        build_from_cfg(transform, PIPELINES)
+
+    # test out_channels is None
+    transform = dict(type='RGB2Gray')
+    transform = build_from_cfg(transform, PIPELINES)
+
+    assert str(transform) == f'RGB2Gray(' \
+                             f'out_channels={None}, ' \
+                             f'weights={(0.299, 0.587, 0.114)})'
+
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    h, w, c = img.shape
+    seg = np.array(
+        Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+    results['img'] = img
+    results['gt_semantic_seg'] = seg
+    results['seg_fields'] = ['gt_semantic_seg']
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+    assert results['img'].shape == (h, w, c)
+    assert results['img_shape'] == (h, w, c)
+    assert results['ori_shape'] == (h, w, c)
+
+    # test out_channels = 2
+    transform = dict(type='RGB2Gray', out_channels=2)
+    transform = build_from_cfg(transform, PIPELINES)
+
+    assert str(transform) == f'RGB2Gray(' \
+                             f'out_channels={2}, ' \
+                             f'weights={(0.299, 0.587, 0.114)})'
+
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    h, w, c = img.shape
+    seg = np.array(
+        Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+    results['img'] = img
+    results['gt_semantic_seg'] = seg
+    results['seg_fields'] = ['gt_semantic_seg']
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+    assert results['img'].shape == (h, w, 2)
+    assert results['img_shape'] == (h, w, 2)
+    assert results['ori_shape'] == (h, w, c)
+
+
+def test_adjust_gamma():
+    # test assertion if gamma <= 0
+    with pytest.raises(AssertionError):
+        transform = dict(type='AdjustGamma', gamma=0)
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion if gamma is list
+    with pytest.raises(AssertionError):
+        transform = dict(type='AdjustGamma', gamma=[1.2])
+        build_from_cfg(transform, PIPELINES)
+
+    # test with gamma = 1.2
+    transform = dict(type='AdjustGamma', gamma=1.2)
+    transform = build_from_cfg(transform, PIPELINES)
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    original_img = copy.deepcopy(img)
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+
+    inv_gamma = 1.0 / 1.2
+    table = np.array([((i / 255.0)**inv_gamma) * 255
+                      for i in np.arange(0, 256)]).astype('uint8')
+    converted_img = mmcv.lut_transform(
+        np.array(original_img, dtype=np.uint8), table)
+    assert np.allclose(results['img'], converted_img)
+    assert str(transform) == f'AdjustGamma(gamma={1.2})'
+
+
+def test_rerange():
+    # test assertion if min_value or max_value is illegal
+    with pytest.raises(AssertionError):
+        transform = dict(type='Rerange', min_value=[0], max_value=[255])
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion if min_value >= max_value
+    with pytest.raises(AssertionError):
+        transform = dict(type='Rerange', min_value=1, max_value=1)
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion if img_min_value == img_max_value
+    with pytest.raises(AssertionError):
+        transform = dict(type='Rerange', min_value=0, max_value=1)
+        transform = build_from_cfg(transform, PIPELINES)
+        results = dict()
+        results['img'] = np.array([[1, 1], [1, 1]])
+        transform(results)
+
+    img_rerange_cfg = dict()
+    transform = dict(type='Rerange', **img_rerange_cfg)
+    transform = build_from_cfg(transform, PIPELINES)
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    original_img = copy.deepcopy(img)
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+
+    min_value = np.min(original_img)
+    max_value = np.max(original_img)
+    converted_img = (original_img - min_value) / (max_value - min_value) * 255
+
+    assert np.allclose(results['img'], converted_img)
+    assert str(transform) == f'Rerange(min_value={0}, max_value={255})'
+
+
+def test_CLAHE():
+    # test assertion if clip_limit is None
+    with pytest.raises(AssertionError):
+        transform = dict(type='CLAHE', clip_limit=None)
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion if tile_grid_size is illegal
+    with pytest.raises(AssertionError):
+        transform = dict(type='CLAHE', tile_grid_size=(8.0, 8.0))
+        build_from_cfg(transform, PIPELINES)
+
+    # test assertion if tile_grid_size is illegal
+    with pytest.raises(AssertionError):
+        transform = dict(type='CLAHE', tile_grid_size=(9, 9, 9))
+        build_from_cfg(transform, PIPELINES)
+
+    transform = dict(type='CLAHE', clip_limit=2)
+    transform = build_from_cfg(transform, PIPELINES)
+    results = dict()
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    original_img = copy.deepcopy(img)
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    results = transform(results)
+
+    converted_img = np.empty(original_img.shape)
+    for i in range(original_img.shape[2]):
+        converted_img[:, :, i] = mmcv.clahe(
+            np.array(original_img[:, :, i], dtype=np.uint8), 2, (8, 8))
+
+    assert np.allclose(results['img'], converted_img)
+    assert str(transform) == f'CLAHE(clip_limit={2}, tile_grid_size={(8, 8)})'
+
+
+def test_seg_rescale():
+    results = dict()
+    seg = np.array(
+        Image.open(osp.join(osp.dirname(__file__), '../data/seg.png')))
+    results['gt_semantic_seg'] = seg
+    results['seg_fields'] = ['gt_semantic_seg']
+    h, w = seg.shape
+
+    transform = dict(type='SegRescale', scale_factor=1. / 2)
+    rescale_module = build_from_cfg(transform, PIPELINES)
+    rescale_results = rescale_module(results.copy())
+    assert rescale_results['gt_semantic_seg'].shape == (h // 2, w // 2)
+
+    transform = dict(type='SegRescale', scale_factor=1)
+    rescale_module = build_from_cfg(transform, PIPELINES)
+    rescale_results = rescale_module(results.copy())
+    assert rescale_results['gt_semantic_seg'].shape == (h, w)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_tta.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_tta.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc8c71e57c02d45a3f0587cd9e57caa7b467c5ab
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_data/test_tta.py
@@ -0,0 +1,150 @@
+import os.path as osp
+
+import mmcv
+import pytest
+from mmcv.utils import build_from_cfg
+
+from mmseg.datasets.builder import PIPELINES
+
+
+def test_multi_scale_flip_aug():
+    # test assertion if img_scale=None, img_ratios=1 (not float).
+    with pytest.raises(AssertionError):
+        tta_transform = dict(
+            type='MultiScaleFlipAug',
+            img_scale=None,
+            img_ratios=1,
+            transforms=[dict(type='Resize', keep_ratio=False)],
+        )
+        build_from_cfg(tta_transform, PIPELINES)
+
+    # test assertion if img_scale=None, img_ratios=None.
+    with pytest.raises(AssertionError):
+        tta_transform = dict(
+            type='MultiScaleFlipAug',
+            img_scale=None,
+            img_ratios=None,
+            transforms=[dict(type='Resize', keep_ratio=False)],
+        )
+        build_from_cfg(tta_transform, PIPELINES)
+
+    # test assertion if img_scale=(512, 512), img_ratios=1 (not float).
+    with pytest.raises(AssertionError):
+        tta_transform = dict(
+            type='MultiScaleFlipAug',
+            img_scale=(512, 512),
+            img_ratios=1,
+            transforms=[dict(type='Resize', keep_ratio=False)],
+        )
+        build_from_cfg(tta_transform, PIPELINES)
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=(512, 512),
+        img_ratios=[0.5, 1.0, 2.0],
+        flip=False,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+
+    results = dict()
+    # (288, 512, 3)
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color')
+    results['img'] = img
+    results['img_shape'] = img.shape
+    results['ori_shape'] = img.shape
+    # Set initial values for default meta_keys
+    results['pad_shape'] = img.shape
+    results['scale_factor'] = 1.0
+
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(256, 256), (512, 512), (1024, 1024)]
+    assert tta_results['flip'] == [False, False, False]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=(512, 512),
+        img_ratios=[0.5, 1.0, 2.0],
+        flip=True,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(256, 256), (256, 256), (512, 512),
+                                    (512, 512), (1024, 1024), (1024, 1024)]
+    assert tta_results['flip'] == [False, True, False, True, False, True]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=(512, 512),
+        img_ratios=1.0,
+        flip=False,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(512, 512)]
+    assert tta_results['flip'] == [False]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=(512, 512),
+        img_ratios=1.0,
+        flip=True,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(512, 512), (512, 512)]
+    assert tta_results['flip'] == [False, True]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=None,
+        img_ratios=[0.5, 1.0, 2.0],
+        flip=False,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(256, 144), (512, 288), (1024, 576)]
+    assert tta_results['flip'] == [False, False, False]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=None,
+        img_ratios=[0.5, 1.0, 2.0],
+        flip=True,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(256, 144), (256, 144), (512, 288),
+                                    (512, 288), (1024, 576), (1024, 576)]
+    assert tta_results['flip'] == [False, True, False, True, False, True]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=[(256, 256), (512, 512), (1024, 1024)],
+        img_ratios=None,
+        flip=False,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(256, 256), (512, 512), (1024, 1024)]
+    assert tta_results['flip'] == [False, False, False]
+
+    tta_transform = dict(
+        type='MultiScaleFlipAug',
+        img_scale=[(256, 256), (512, 512), (1024, 1024)],
+        img_ratios=None,
+        flip=True,
+        transforms=[dict(type='Resize', keep_ratio=False)],
+    )
+    tta_module = build_from_cfg(tta_transform, PIPELINES)
+    tta_results = tta_module(results.copy())
+    assert tta_results['scale'] == [(256, 256), (256, 256), (512, 512),
+                                    (512, 512), (1024, 1024), (1024, 1024)]
+    assert tta_results['flip'] == [False, True, False, True, False, True]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_eval_hook.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_eval_hook.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6a1352ea5419c1876551165edaf833f251e7151
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_eval_hook.py
@@ -0,0 +1,193 @@
+import logging
+import tempfile
+from unittest.mock import MagicMock, patch
+
+import mmcv.runner
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.runner import obj_from_dict
+from torch.utils.data import DataLoader, Dataset
+
+from mmseg.apis import single_gpu_test
+from mmseg.core import DistEvalHook, EvalHook
+
+
+class ExampleDataset(Dataset):
+
+    def __getitem__(self, idx):
+        results = dict(img=torch.tensor([1]), img_metas=dict())
+        return results
+
+    def __len__(self):
+        return 1
+
+
+class ExampleModel(nn.Module):
+
+    def __init__(self):
+        super(ExampleModel, self).__init__()
+        self.test_cfg = None
+        self.conv = nn.Conv2d(3, 3, 3)
+
+    def forward(self, img, img_metas, test_mode=False, **kwargs):
+        return img
+
+    def train_step(self, data_batch, optimizer):
+        loss = self.forward(**data_batch)
+        return dict(loss=loss)
+
+
+def test_iter_eval_hook():
+    with pytest.raises(TypeError):
+        test_dataset = ExampleModel()
+        data_loader = [
+            DataLoader(
+                test_dataset,
+                batch_size=1,
+                sampler=None,
+                num_worker=0,
+                shuffle=False)
+        ]
+        EvalHook(data_loader)
+
+    test_dataset = ExampleDataset()
+    test_dataset.evaluate = MagicMock(return_value=dict(test='success'))
+    loader = DataLoader(test_dataset, batch_size=1)
+    model = ExampleModel()
+    data_loader = DataLoader(
+        test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False)
+    optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+    optimizer = obj_from_dict(optim_cfg, torch.optim,
+                              dict(params=model.parameters()))
+
+    # test EvalHook
+    with tempfile.TemporaryDirectory() as tmpdir:
+        eval_hook = EvalHook(data_loader)
+        runner = mmcv.runner.IterBasedRunner(
+            model=model,
+            optimizer=optimizer,
+            work_dir=tmpdir,
+            logger=logging.getLogger())
+        runner.register_hook(eval_hook)
+        runner.run([loader], [('train', 1)], 1)
+        test_dataset.evaluate.assert_called_with([torch.tensor([1])],
+                                                 logger=runner.logger)
+
+
+def test_epoch_eval_hook():
+    with pytest.raises(TypeError):
+        test_dataset = ExampleModel()
+        data_loader = [
+            DataLoader(
+                test_dataset,
+                batch_size=1,
+                sampler=None,
+                num_worker=0,
+                shuffle=False)
+        ]
+        EvalHook(data_loader, by_epoch=True)
+
+    test_dataset = ExampleDataset()
+    test_dataset.evaluate = MagicMock(return_value=dict(test='success'))
+    loader = DataLoader(test_dataset, batch_size=1)
+    model = ExampleModel()
+    data_loader = DataLoader(
+        test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False)
+    optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+    optimizer = obj_from_dict(optim_cfg, torch.optim,
+                              dict(params=model.parameters()))
+
+    # test EvalHook with interval
+    with tempfile.TemporaryDirectory() as tmpdir:
+        eval_hook = EvalHook(data_loader, by_epoch=True, interval=2)
+        runner = mmcv.runner.EpochBasedRunner(
+            model=model,
+            optimizer=optimizer,
+            work_dir=tmpdir,
+            logger=logging.getLogger())
+        runner.register_hook(eval_hook)
+        runner.run([loader], [('train', 1)], 2)
+        test_dataset.evaluate.assert_called_once_with([torch.tensor([1])],
+                                                      logger=runner.logger)
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    results = single_gpu_test(model, data_loader)
+    return results
+
+
+@patch('mmseg.apis.multi_gpu_test', multi_gpu_test)
+def test_dist_eval_hook():
+    with pytest.raises(TypeError):
+        test_dataset = ExampleModel()
+        data_loader = [
+            DataLoader(
+                test_dataset,
+                batch_size=1,
+                sampler=None,
+                num_worker=0,
+                shuffle=False)
+        ]
+        DistEvalHook(data_loader)
+
+    test_dataset = ExampleDataset()
+    test_dataset.evaluate = MagicMock(return_value=dict(test='success'))
+    loader = DataLoader(test_dataset, batch_size=1)
+    model = ExampleModel()
+    data_loader = DataLoader(
+        test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False)
+    optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+    optimizer = obj_from_dict(optim_cfg, torch.optim,
+                              dict(params=model.parameters()))
+
+    # test DistEvalHook
+    with tempfile.TemporaryDirectory() as tmpdir:
+        eval_hook = DistEvalHook(data_loader)
+        runner = mmcv.runner.IterBasedRunner(
+            model=model,
+            optimizer=optimizer,
+            work_dir=tmpdir,
+            logger=logging.getLogger())
+        runner.register_hook(eval_hook)
+        runner.run([loader], [('train', 1)], 1)
+        test_dataset.evaluate.assert_called_with([torch.tensor([1])],
+                                                 logger=runner.logger)
+
+
+@patch('mmseg.apis.multi_gpu_test', multi_gpu_test)
+def test_dist_eval_hook_epoch():
+    with pytest.raises(TypeError):
+        test_dataset = ExampleModel()
+        data_loader = [
+            DataLoader(
+                test_dataset,
+                batch_size=1,
+                sampler=None,
+                num_worker=0,
+                shuffle=False)
+        ]
+        DistEvalHook(data_loader)
+
+    test_dataset = ExampleDataset()
+    test_dataset.evaluate = MagicMock(return_value=dict(test='success'))
+    loader = DataLoader(test_dataset, batch_size=1)
+    model = ExampleModel()
+    data_loader = DataLoader(
+        test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False)
+    optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+    optimizer = obj_from_dict(optim_cfg, torch.optim,
+                              dict(params=model.parameters()))
+
+    # test DistEvalHook
+    with tempfile.TemporaryDirectory() as tmpdir:
+        eval_hook = DistEvalHook(data_loader, by_epoch=True, interval=2)
+        runner = mmcv.runner.EpochBasedRunner(
+            model=model,
+            optimizer=optimizer,
+            work_dir=tmpdir,
+            logger=logging.getLogger())
+        runner.register_hook(eval_hook)
+        runner.run([loader], [('train', 1)], 2)
+        test_dataset.evaluate.assert_called_with([torch.tensor([1])],
+                                                 logger=runner.logger)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_inference.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..046e036281db50ccc66e75d8e78c6eb63618cac7
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_inference.py
@@ -0,0 +1,29 @@
+import os.path as osp
+
+import mmcv
+
+from mmseg.apis import inference_segmentor, init_segmentor
+
+
+def test_test_time_augmentation_on_cpu():
+    config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'
+    config = mmcv.Config.fromfile(config_file)
+
+    # Remove pretrain model download for testing
+    config.model.pretrained = None
+    # Replace SyncBN with BN to inference on CPU
+    norm_cfg = dict(type='BN', requires_grad=True)
+    config.model.backbone.norm_cfg = norm_cfg
+    config.model.decode_head.norm_cfg = norm_cfg
+    config.model.auxiliary_head.norm_cfg = norm_cfg
+
+    # Enable test time augmentation
+    config.data.test.pipeline[1].flip = True
+
+    checkpoint_file = None
+    model = init_segmentor(config, checkpoint_file, device='cpu')
+
+    img = mmcv.imread(
+        osp.join(osp.dirname(__file__), 'data/color.jpg'), 'color')
+    result = inference_segmentor(model, img)
+    assert result[0].shape == (288, 512)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_metrics.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..023bbb0a5563f36ae1902fab4edb8e5240f4ae02
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_metrics.py
@@ -0,0 +1,166 @@
+import numpy as np
+
+from mmseg.core.evaluation import eval_metrics, mean_dice, mean_iou
+
+
+def get_confusion_matrix(pred_label, label, num_classes, ignore_index):
+    """Intersection over Union
+       Args:
+           pred_label (np.ndarray): 2D predict map
+           label (np.ndarray): label 2D label map
+           num_classes (int): number of categories
+           ignore_index (int): index ignore in evaluation
+       """
+
+    mask = (label != ignore_index)
+    pred_label = pred_label[mask]
+    label = label[mask]
+
+    n = num_classes
+    inds = n * label + pred_label
+
+    mat = np.bincount(inds, minlength=n**2).reshape(n, n)
+
+    return mat
+
+
+# This func is deprecated since it's not memory efficient
+def legacy_mean_iou(results, gt_seg_maps, num_classes, ignore_index):
+    num_imgs = len(results)
+    assert len(gt_seg_maps) == num_imgs
+    total_mat = np.zeros((num_classes, num_classes), dtype=np.float)
+    for i in range(num_imgs):
+        mat = get_confusion_matrix(
+            results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
+        total_mat += mat
+    all_acc = np.diag(total_mat).sum() / total_mat.sum()
+    acc = np.diag(total_mat) / total_mat.sum(axis=1)
+    iou = np.diag(total_mat) / (
+        total_mat.sum(axis=1) + total_mat.sum(axis=0) - np.diag(total_mat))
+
+    return all_acc, acc, iou
+
+
+# This func is deprecated since it's not memory efficient
+def legacy_mean_dice(results, gt_seg_maps, num_classes, ignore_index):
+    num_imgs = len(results)
+    assert len(gt_seg_maps) == num_imgs
+    total_mat = np.zeros((num_classes, num_classes), dtype=np.float)
+    for i in range(num_imgs):
+        mat = get_confusion_matrix(
+            results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
+        total_mat += mat
+    all_acc = np.diag(total_mat).sum() / total_mat.sum()
+    acc = np.diag(total_mat) / total_mat.sum(axis=1)
+    dice = 2 * np.diag(total_mat) / (
+        total_mat.sum(axis=1) + total_mat.sum(axis=0))
+
+    return all_acc, acc, dice
+
+
+def test_metrics():
+    pred_size = (10, 30, 30)
+    num_classes = 19
+    ignore_index = 255
+    results = np.random.randint(0, num_classes, size=pred_size)
+    label = np.random.randint(0, num_classes, size=pred_size)
+    label[:, 2, 5:10] = ignore_index
+    all_acc, acc, iou = eval_metrics(
+        results, label, num_classes, ignore_index, metrics='mIoU')
+    all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
+                                              ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+
+    all_acc, acc, dice = eval_metrics(
+        results, label, num_classes, ignore_index, metrics='mDice')
+    all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes,
+                                                ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(dice, dice_l)
+
+    all_acc, acc, iou, dice = eval_metrics(
+        results, label, num_classes, ignore_index, metrics=['mIoU', 'mDice'])
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+    assert np.allclose(dice, dice_l)
+
+    results = np.random.randint(0, 5, size=pred_size)
+    label = np.random.randint(0, 4, size=pred_size)
+    all_acc, acc, iou = eval_metrics(
+        results,
+        label,
+        num_classes,
+        ignore_index=255,
+        metrics='mIoU',
+        nan_to_num=-1)
+    assert acc[-1] == -1
+    assert iou[-1] == -1
+
+    all_acc, acc, dice = eval_metrics(
+        results,
+        label,
+        num_classes,
+        ignore_index=255,
+        metrics='mDice',
+        nan_to_num=-1)
+    assert acc[-1] == -1
+    assert dice[-1] == -1
+
+    all_acc, acc, dice, iou = eval_metrics(
+        results,
+        label,
+        num_classes,
+        ignore_index=255,
+        metrics=['mDice', 'mIoU'],
+        nan_to_num=-1)
+    assert acc[-1] == -1
+    assert dice[-1] == -1
+    assert iou[-1] == -1
+
+
+def test_mean_iou():
+    pred_size = (10, 30, 30)
+    num_classes = 19
+    ignore_index = 255
+    results = np.random.randint(0, num_classes, size=pred_size)
+    label = np.random.randint(0, num_classes, size=pred_size)
+    label[:, 2, 5:10] = ignore_index
+    all_acc, acc, iou = mean_iou(results, label, num_classes, ignore_index)
+    all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
+                                              ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+
+    results = np.random.randint(0, 5, size=pred_size)
+    label = np.random.randint(0, 4, size=pred_size)
+    all_acc, acc, iou = mean_iou(
+        results, label, num_classes, ignore_index=255, nan_to_num=-1)
+    assert acc[-1] == -1
+    assert iou[-1] == -1
+
+
+def test_mean_dice():
+    pred_size = (10, 30, 30)
+    num_classes = 19
+    ignore_index = 255
+    results = np.random.randint(0, num_classes, size=pred_size)
+    label = np.random.randint(0, num_classes, size=pred_size)
+    label[:, 2, 5:10] = ignore_index
+    all_acc, acc, iou = mean_dice(results, label, num_classes, ignore_index)
+    all_acc_l, acc_l, iou_l = legacy_mean_dice(results, label, num_classes,
+                                               ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+
+    results = np.random.randint(0, 5, size=pred_size)
+    label = np.random.randint(0, 4, size=pred_size)
+    all_acc, acc, iou = mean_dice(
+        results, label, num_classes, ignore_index=255, nan_to_num=-1)
+    assert acc[-1] == -1
+    assert iou[-1] == -1
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_backbone.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ed6ce222f0c4135c2227c55f60efa1f05e62cec
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_backbone.py
@@ -0,0 +1,939 @@
+import pytest
+import torch
+from mmcv.ops import DeformConv2dPack
+from mmcv.utils.parrots_wrapper import _BatchNorm
+from torch.nn.modules import AvgPool2d, GroupNorm
+
+from mmseg.models.backbones import (CGNet, FastSCNN, MobileNetV3, ResNeSt,
+                                    ResNet, ResNetV1d, ResNeXt)
+from mmseg.models.backbones.cgnet import (ContextGuidedBlock,
+                                          GlobalContextExtractor)
+from mmseg.models.backbones.resnest import Bottleneck as BottleneckS
+from mmseg.models.backbones.resnet import BasicBlock, Bottleneck
+from mmseg.models.backbones.resnext import Bottleneck as BottleneckX
+from mmseg.models.utils import ResLayer
+
+
+def is_block(modules):
+    """Check if is ResNet building block."""
+    if isinstance(modules, (BasicBlock, Bottleneck, BottleneckX)):
+        return True
+    return False
+
+
+def is_norm(modules):
+    """Check if is one of the norms."""
+    if isinstance(modules, (GroupNorm, _BatchNorm)):
+        return True
+    return False
+
+
+def all_zeros(modules):
+    """Check if the weight(and bias) is all zero."""
+    weight_zero = torch.allclose(modules.weight.data,
+                                 torch.zeros_like(modules.weight.data))
+    if hasattr(modules, 'bias'):
+        bias_zero = torch.allclose(modules.bias.data,
+                                   torch.zeros_like(modules.bias.data))
+    else:
+        bias_zero = True
+
+    return weight_zero and bias_zero
+
+
+def check_norm_state(modules, train_state):
+    """Check if norm layer is in correct train state."""
+    for mod in modules:
+        if isinstance(mod, _BatchNorm):
+            if mod.training != train_state:
+                return False
+    return True
+
+
+def test_resnet_basic_block():
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+        BasicBlock(64, 64, dcn=dcn)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv3')
+        ]
+        BasicBlock(64, 64, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet
+        plugins = [
+            dict(
+                cfg=dict(
+                    type='GeneralizedAttention',
+                    spatial_range=-1,
+                    num_heads=8,
+                    attention_type='0010',
+                    kv_stride=2),
+                position='after_conv2')
+        ]
+        BasicBlock(64, 64, plugins=plugins)
+
+    # Test BasicBlock with checkpoint forward
+    block = BasicBlock(16, 16, with_cp=True)
+    assert block.with_cp
+    x = torch.randn(1, 16, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 16, 56, 56])
+
+    # test BasicBlock structure and forward
+    block = BasicBlock(64, 64)
+    assert block.conv1.in_channels == 64
+    assert block.conv1.out_channels == 64
+    assert block.conv1.kernel_size == (3, 3)
+    assert block.conv2.in_channels == 64
+    assert block.conv2.out_channels == 64
+    assert block.conv2.kernel_size == (3, 3)
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnet_bottleneck():
+    with pytest.raises(AssertionError):
+        # Style must be in ['pytorch', 'caffe']
+        Bottleneck(64, 64, style='tensorflow')
+
+    with pytest.raises(AssertionError):
+        # Allowed positions are 'after_conv1', 'after_conv2', 'after_conv3'
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv4')
+        ]
+        Bottleneck(64, 16, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # Need to specify different postfix to avoid duplicate plugin name
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv3'),
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv3')
+        ]
+        Bottleneck(64, 16, plugins=plugins)
+
+    with pytest.raises(KeyError):
+        # Plugin type is not supported
+        plugins = [dict(cfg=dict(type='WrongPlugin'), position='after_conv3')]
+        Bottleneck(64, 16, plugins=plugins)
+
+    # Test Bottleneck with checkpoint forward
+    block = Bottleneck(64, 16, with_cp=True)
+    assert block.with_cp
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test Bottleneck style
+    block = Bottleneck(64, 64, stride=2, style='pytorch')
+    assert block.conv1.stride == (1, 1)
+    assert block.conv2.stride == (2, 2)
+    block = Bottleneck(64, 64, stride=2, style='caffe')
+    assert block.conv1.stride == (2, 2)
+    assert block.conv2.stride == (1, 1)
+
+    # Test Bottleneck DCN
+    dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+    with pytest.raises(AssertionError):
+        Bottleneck(64, 64, dcn=dcn, conv_cfg=dict(type='Conv'))
+    block = Bottleneck(64, 64, dcn=dcn)
+    assert isinstance(block.conv2, DeformConv2dPack)
+
+    # Test Bottleneck forward
+    block = Bottleneck(64, 16)
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test Bottleneck with 1 ContextBlock after conv3
+    plugins = [
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16),
+            position='after_conv3')
+    ]
+    block = Bottleneck(64, 16, plugins=plugins)
+    assert block.context_block.in_channels == 64
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test Bottleneck with 1 GeneralizedAttention after conv2
+    plugins = [
+        dict(
+            cfg=dict(
+                type='GeneralizedAttention',
+                spatial_range=-1,
+                num_heads=8,
+                attention_type='0010',
+                kv_stride=2),
+            position='after_conv2')
+    ]
+    block = Bottleneck(64, 16, plugins=plugins)
+    assert block.gen_attention_block.in_channels == 16
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test Bottleneck with 1 GeneralizedAttention after conv2, 1 NonLocal2d
+    # after conv2, 1 ContextBlock after conv3
+    plugins = [
+        dict(
+            cfg=dict(
+                type='GeneralizedAttention',
+                spatial_range=-1,
+                num_heads=8,
+                attention_type='0010',
+                kv_stride=2),
+            position='after_conv2'),
+        dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16),
+            position='after_conv3')
+    ]
+    block = Bottleneck(64, 16, plugins=plugins)
+    assert block.gen_attention_block.in_channels == 16
+    assert block.nonlocal_block.in_channels == 16
+    assert block.context_block.in_channels == 64
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test Bottleneck with 1 ContextBlock after conv2, 2 ContextBlock after
+    # conv3
+    plugins = [
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),
+            position='after_conv2'),
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),
+            position='after_conv3'),
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=3),
+            position='after_conv3')
+    ]
+    block = Bottleneck(64, 16, plugins=plugins)
+    assert block.context_block1.in_channels == 16
+    assert block.context_block2.in_channels == 64
+    assert block.context_block3.in_channels == 64
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnet_res_layer():
+    # Test ResLayer of 3 Bottleneck w\o downsample
+    layer = ResLayer(Bottleneck, 64, 16, 3)
+    assert len(layer) == 3
+    assert layer[0].conv1.in_channels == 64
+    assert layer[0].conv1.out_channels == 16
+    for i in range(1, len(layer)):
+        assert layer[i].conv1.in_channels == 64
+        assert layer[i].conv1.out_channels == 16
+    for i in range(len(layer)):
+        assert layer[i].downsample is None
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test ResLayer of 3 Bottleneck with downsample
+    layer = ResLayer(Bottleneck, 64, 64, 3)
+    assert layer[0].downsample[0].out_channels == 256
+    for i in range(1, len(layer)):
+        assert layer[i].downsample is None
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 256, 56, 56])
+
+    # Test ResLayer of 3 Bottleneck with stride=2
+    layer = ResLayer(Bottleneck, 64, 64, 3, stride=2)
+    assert layer[0].downsample[0].out_channels == 256
+    assert layer[0].downsample[0].stride == (2, 2)
+    for i in range(1, len(layer)):
+        assert layer[i].downsample is None
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 256, 28, 28])
+
+    # Test ResLayer of 3 Bottleneck with stride=2 and average downsample
+    layer = ResLayer(Bottleneck, 64, 64, 3, stride=2, avg_down=True)
+    assert isinstance(layer[0].downsample[0], AvgPool2d)
+    assert layer[0].downsample[1].out_channels == 256
+    assert layer[0].downsample[1].stride == (1, 1)
+    for i in range(1, len(layer)):
+        assert layer[i].downsample is None
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 256, 28, 28])
+
+    # Test ResLayer of 3 Bottleneck with dilation=2
+    layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2)
+    for i in range(len(layer)):
+        assert layer[i].conv2.dilation == (2, 2)
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test ResLayer of 3 Bottleneck with dilation=2, contract_dilation=True
+    layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2, contract_dilation=True)
+    assert layer[0].conv2.dilation == (1, 1)
+    for i in range(1, len(layer)):
+        assert layer[i].conv2.dilation == (2, 2)
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+    # Test ResLayer of 3 Bottleneck with dilation=2, multi_grid
+    layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2, multi_grid=(1, 2, 4))
+    assert layer[0].conv2.dilation == (1, 1)
+    assert layer[1].conv2.dilation == (2, 2)
+    assert layer[2].conv2.dilation == (4, 4)
+    x = torch.randn(1, 64, 56, 56)
+    x_out = layer(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnet_backbone():
+    """Test resnet backbone."""
+    with pytest.raises(KeyError):
+        # ResNet depth should be in [18, 34, 50, 101, 152]
+        ResNet(20)
+
+    with pytest.raises(AssertionError):
+        # In ResNet: 1 <= num_stages <= 4
+        ResNet(50, num_stages=0)
+
+    with pytest.raises(AssertionError):
+        # len(stage_with_dcn) == num_stages
+        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+        ResNet(50, dcn=dcn, stage_with_dcn=(True, ))
+
+    with pytest.raises(AssertionError):
+        # len(stage_with_plugin) == num_stages
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                stages=(False, True, True),
+                position='after_conv3')
+        ]
+        ResNet(50, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # In ResNet: 1 <= num_stages <= 4
+        ResNet(50, num_stages=5)
+
+    with pytest.raises(AssertionError):
+        # len(strides) == len(dilations) == num_stages
+        ResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)
+
+    with pytest.raises(TypeError):
+        # pretrained must be a string path
+        model = ResNet(50)
+        model.init_weights(pretrained=0)
+
+    with pytest.raises(AssertionError):
+        # Style must be in ['pytorch', 'caffe']
+        ResNet(50, style='tensorflow')
+
+    # Test ResNet50 norm_eval=True
+    model = ResNet(50, norm_eval=True)
+    model.init_weights()
+    model.train()
+    assert check_norm_state(model.modules(), False)
+
+    # Test ResNet50 with torchvision pretrained weight
+    model = ResNet(depth=50, norm_eval=True)
+    model.init_weights('torchvision://resnet50')
+    model.train()
+    assert check_norm_state(model.modules(), False)
+
+    # Test ResNet50 with first stage frozen
+    frozen_stages = 1
+    model = ResNet(50, frozen_stages=frozen_stages)
+    model.init_weights()
+    model.train()
+    assert model.norm1.training is False
+    for layer in [model.conv1, model.norm1]:
+        for param in layer.parameters():
+            assert param.requires_grad is False
+    for i in range(1, frozen_stages + 1):
+        layer = getattr(model, 'layer{}'.format(i))
+        for mod in layer.modules():
+            if isinstance(mod, _BatchNorm):
+                assert mod.training is False
+        for param in layer.parameters():
+            assert param.requires_grad is False
+
+    # Test ResNet50V1d with first stage frozen
+    model = ResNetV1d(depth=50, frozen_stages=frozen_stages)
+    assert len(model.stem) == 9
+    model.init_weights()
+    model.train()
+    check_norm_state(model.stem, False)
+    for param in model.stem.parameters():
+        assert param.requires_grad is False
+    for i in range(1, frozen_stages + 1):
+        layer = getattr(model, 'layer{}'.format(i))
+        for mod in layer.modules():
+            if isinstance(mod, _BatchNorm):
+                assert mod.training is False
+        for param in layer.parameters():
+            assert param.requires_grad is False
+
+    # Test ResNet18 forward
+    model = ResNet(18)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 64, 56, 56])
+    assert feat[1].shape == torch.Size([1, 128, 28, 28])
+    assert feat[2].shape == torch.Size([1, 256, 14, 14])
+    assert feat[3].shape == torch.Size([1, 512, 7, 7])
+
+    # Test ResNet50 with BatchNorm forward
+    model = ResNet(50)
+    for m in model.modules():
+        if is_norm(m):
+            assert isinstance(m, _BatchNorm)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+    # Test ResNet50 with layers 1, 2, 3 out forward
+    model = ResNet(50, out_indices=(0, 1, 2))
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 3
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+
+    # Test ResNet18 with checkpoint forward
+    model = ResNet(18, with_cp=True)
+    for m in model.modules():
+        if is_block(m):
+            assert m.with_cp
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 64, 56, 56])
+    assert feat[1].shape == torch.Size([1, 128, 28, 28])
+    assert feat[2].shape == torch.Size([1, 256, 14, 14])
+    assert feat[3].shape == torch.Size([1, 512, 7, 7])
+
+    # Test ResNet50 with checkpoint forward
+    model = ResNet(50, with_cp=True)
+    for m in model.modules():
+        if is_block(m):
+            assert m.with_cp
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+    # Test ResNet50 with GroupNorm forward
+    model = ResNet(
+        50, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))
+    for m in model.modules():
+        if is_norm(m):
+            assert isinstance(m, GroupNorm)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+    # Test ResNet50 with 1 GeneralizedAttention after conv2, 1 NonLocal2d
+    # after conv2, 1 ContextBlock after conv3 in layers 2, 3, 4
+    plugins = [
+        dict(
+            cfg=dict(
+                type='GeneralizedAttention',
+                spatial_range=-1,
+                num_heads=8,
+                attention_type='0010',
+                kv_stride=2),
+            stages=(False, True, True, True),
+            position='after_conv2'),
+        dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16),
+            stages=(False, True, True, False),
+            position='after_conv3')
+    ]
+    model = ResNet(50, plugins=plugins)
+    for m in model.layer1.modules():
+        if is_block(m):
+            assert not hasattr(m, 'context_block')
+            assert not hasattr(m, 'gen_attention_block')
+            assert m.nonlocal_block.in_channels == 64
+    for m in model.layer2.modules():
+        if is_block(m):
+            assert m.nonlocal_block.in_channels == 128
+            assert m.gen_attention_block.in_channels == 128
+            assert m.context_block.in_channels == 512
+
+    for m in model.layer3.modules():
+        if is_block(m):
+            assert m.nonlocal_block.in_channels == 256
+            assert m.gen_attention_block.in_channels == 256
+            assert m.context_block.in_channels == 1024
+
+    for m in model.layer4.modules():
+        if is_block(m):
+            assert m.nonlocal_block.in_channels == 512
+            assert m.gen_attention_block.in_channels == 512
+            assert not hasattr(m, 'context_block')
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+    # Test ResNet50 with 1 ContextBlock after conv2, 1 ContextBlock after
+    # conv3 in layers 2, 3, 4
+    plugins = [
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),
+            stages=(False, True, True, False),
+            position='after_conv3'),
+        dict(
+            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),
+            stages=(False, True, True, False),
+            position='after_conv3')
+    ]
+
+    model = ResNet(50, plugins=plugins)
+    for m in model.layer1.modules():
+        if is_block(m):
+            assert not hasattr(m, 'context_block')
+            assert not hasattr(m, 'context_block1')
+            assert not hasattr(m, 'context_block2')
+    for m in model.layer2.modules():
+        if is_block(m):
+            assert not hasattr(m, 'context_block')
+            assert m.context_block1.in_channels == 512
+            assert m.context_block2.in_channels == 512
+
+    for m in model.layer3.modules():
+        if is_block(m):
+            assert not hasattr(m, 'context_block')
+            assert m.context_block1.in_channels == 1024
+            assert m.context_block2.in_channels == 1024
+
+    for m in model.layer4.modules():
+        if is_block(m):
+            assert not hasattr(m, 'context_block')
+            assert not hasattr(m, 'context_block1')
+            assert not hasattr(m, 'context_block2')
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+    # Test ResNet50 zero initialization of residual
+    model = ResNet(50, zero_init_residual=True)
+    model.init_weights()
+    for m in model.modules():
+        if isinstance(m, Bottleneck):
+            assert all_zeros(m.norm3)
+        elif isinstance(m, BasicBlock):
+            assert all_zeros(m.norm2)
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+    # Test ResNetV1d forward
+    model = ResNetV1d(depth=50)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+
+def test_renext_bottleneck():
+    with pytest.raises(AssertionError):
+        # Style must be in ['pytorch', 'caffe']
+        BottleneckX(64, 64, groups=32, base_width=4, style='tensorflow')
+
+    # Test ResNeXt Bottleneck structure
+    block = BottleneckX(
+        64, 64, groups=32, base_width=4, stride=2, style='pytorch')
+    assert block.conv2.stride == (2, 2)
+    assert block.conv2.groups == 32
+    assert block.conv2.out_channels == 128
+
+    # Test ResNeXt Bottleneck with DCN
+    dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+    with pytest.raises(AssertionError):
+        # conv_cfg must be None if dcn is not None
+        BottleneckX(
+            64,
+            64,
+            groups=32,
+            base_width=4,
+            dcn=dcn,
+            conv_cfg=dict(type='Conv'))
+    BottleneckX(64, 64, dcn=dcn)
+
+    # Test ResNeXt Bottleneck forward
+    block = BottleneckX(64, 16, groups=32, base_width=4)
+    x = torch.randn(1, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 64, 56, 56])
+
+
+def test_resnext_backbone():
+    with pytest.raises(KeyError):
+        # ResNeXt depth should be in [50, 101, 152]
+        ResNeXt(depth=18)
+
+    # Test ResNeXt with group 32, base_width 4
+    model = ResNeXt(depth=50, groups=32, base_width=4)
+    print(model)
+    for m in model.modules():
+        if is_block(m):
+            assert m.conv2.groups == 32
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 256, 56, 56])
+    assert feat[1].shape == torch.Size([1, 512, 28, 28])
+    assert feat[2].shape == torch.Size([1, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([1, 2048, 7, 7])
+
+
+def test_fastscnn_backbone():
+    with pytest.raises(AssertionError):
+        # Fast-SCNN channel constraints.
+        FastSCNN(
+            3, (32, 48),
+            64, (64, 96, 128), (2, 2, 1),
+            global_out_channels=127,
+            higher_in_channels=64,
+            lower_in_channels=128)
+
+    # Test FastSCNN Standard Forward
+    model = FastSCNN()
+    model.init_weights()
+    model.train()
+    batch_size = 4
+    imgs = torch.randn(batch_size, 3, 512, 1024)
+    feat = model(imgs)
+
+    assert len(feat) == 3
+    # higher-res
+    assert feat[0].shape == torch.Size([batch_size, 64, 64, 128])
+    # lower-res
+    assert feat[1].shape == torch.Size([batch_size, 128, 16, 32])
+    # FFM output
+    assert feat[2].shape == torch.Size([batch_size, 128, 64, 128])
+
+
+def test_resnest_bottleneck():
+    with pytest.raises(AssertionError):
+        # Style must be in ['pytorch', 'caffe']
+        BottleneckS(64, 64, radix=2, reduction_factor=4, style='tensorflow')
+
+    # Test ResNeSt Bottleneck structure
+    block = BottleneckS(
+        64, 256, radix=2, reduction_factor=4, stride=2, style='pytorch')
+    assert block.avd_layer.stride == 2
+    assert block.conv2.channels == 256
+
+    # Test ResNeSt Bottleneck forward
+    block = BottleneckS(64, 16, radix=2, reduction_factor=4)
+    x = torch.randn(2, 64, 56, 56)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([2, 64, 56, 56])
+
+
+def test_resnest_backbone():
+    with pytest.raises(KeyError):
+        # ResNeSt depth should be in [50, 101, 152, 200]
+        ResNeSt(depth=18)
+
+    # Test ResNeSt with radix 2, reduction_factor 4
+    model = ResNeSt(
+        depth=50, radix=2, reduction_factor=4, out_indices=(0, 1, 2, 3))
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(2, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([2, 256, 56, 56])
+    assert feat[1].shape == torch.Size([2, 512, 28, 28])
+    assert feat[2].shape == torch.Size([2, 1024, 14, 14])
+    assert feat[3].shape == torch.Size([2, 2048, 7, 7])
+
+
+def test_cgnet_GlobalContextExtractor():
+    block = GlobalContextExtractor(16, 16, with_cp=True)
+    x = torch.randn(2, 16, 64, 64, requires_grad=True)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([2, 16, 64, 64])
+
+
+def test_cgnet_context_guided_block():
+    with pytest.raises(AssertionError):
+        # cgnet ContextGuidedBlock GlobalContextExtractor channel and reduction
+        # constraints.
+        ContextGuidedBlock(8, 8)
+
+    # test cgnet ContextGuidedBlock with checkpoint forward
+    block = ContextGuidedBlock(
+        16, 16, act_cfg=dict(type='PReLU'), with_cp=True)
+    assert block.with_cp
+    x = torch.randn(2, 16, 64, 64, requires_grad=True)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([2, 16, 64, 64])
+
+    # test cgnet ContextGuidedBlock without checkpoint forward
+    block = ContextGuidedBlock(32, 32)
+    assert not block.with_cp
+    x = torch.randn(3, 32, 32, 32)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([3, 32, 32, 32])
+
+    # test cgnet ContextGuidedBlock with down sampling
+    block = ContextGuidedBlock(32, 32, downsample=True)
+    assert block.conv1x1.conv.in_channels == 32
+    assert block.conv1x1.conv.out_channels == 32
+    assert block.conv1x1.conv.kernel_size == (3, 3)
+    assert block.conv1x1.conv.stride == (2, 2)
+    assert block.conv1x1.conv.padding == (1, 1)
+
+    assert block.f_loc.in_channels == 32
+    assert block.f_loc.out_channels == 32
+    assert block.f_loc.kernel_size == (3, 3)
+    assert block.f_loc.stride == (1, 1)
+    assert block.f_loc.padding == (1, 1)
+    assert block.f_loc.groups == 32
+    assert block.f_loc.dilation == (1, 1)
+    assert block.f_loc.bias is None
+
+    assert block.f_sur.in_channels == 32
+    assert block.f_sur.out_channels == 32
+    assert block.f_sur.kernel_size == (3, 3)
+    assert block.f_sur.stride == (1, 1)
+    assert block.f_sur.padding == (2, 2)
+    assert block.f_sur.groups == 32
+    assert block.f_sur.dilation == (2, 2)
+    assert block.f_sur.bias is None
+
+    assert block.bottleneck.in_channels == 64
+    assert block.bottleneck.out_channels == 32
+    assert block.bottleneck.kernel_size == (1, 1)
+    assert block.bottleneck.stride == (1, 1)
+    assert block.bottleneck.bias is None
+
+    x = torch.randn(1, 32, 32, 32)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 16, 16])
+
+    # test cgnet ContextGuidedBlock without down sampling
+    block = ContextGuidedBlock(32, 32, downsample=False)
+    assert block.conv1x1.conv.in_channels == 32
+    assert block.conv1x1.conv.out_channels == 16
+    assert block.conv1x1.conv.kernel_size == (1, 1)
+    assert block.conv1x1.conv.stride == (1, 1)
+    assert block.conv1x1.conv.padding == (0, 0)
+
+    assert block.f_loc.in_channels == 16
+    assert block.f_loc.out_channels == 16
+    assert block.f_loc.kernel_size == (3, 3)
+    assert block.f_loc.stride == (1, 1)
+    assert block.f_loc.padding == (1, 1)
+    assert block.f_loc.groups == 16
+    assert block.f_loc.dilation == (1, 1)
+    assert block.f_loc.bias is None
+
+    assert block.f_sur.in_channels == 16
+    assert block.f_sur.out_channels == 16
+    assert block.f_sur.kernel_size == (3, 3)
+    assert block.f_sur.stride == (1, 1)
+    assert block.f_sur.padding == (2, 2)
+    assert block.f_sur.groups == 16
+    assert block.f_sur.dilation == (2, 2)
+    assert block.f_sur.bias is None
+
+    x = torch.randn(1, 32, 32, 32)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 32, 32])
+
+
+def test_cgnet_backbone():
+    with pytest.raises(AssertionError):
+        # check invalid num_channels
+        CGNet(num_channels=(32, 64, 128, 256))
+
+    with pytest.raises(AssertionError):
+        # check invalid num_blocks
+        CGNet(num_blocks=(3, 21, 3))
+
+    with pytest.raises(AssertionError):
+        # check invalid dilation
+        CGNet(num_blocks=2)
+
+    with pytest.raises(AssertionError):
+        # check invalid reduction
+        CGNet(reductions=16)
+
+    with pytest.raises(AssertionError):
+        # check invalid num_channels and reduction
+        CGNet(num_channels=(32, 64, 128), reductions=(64, 129))
+
+    # Test CGNet with default settings
+    model = CGNet()
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(2, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 3
+    assert feat[0].shape == torch.Size([2, 35, 112, 112])
+    assert feat[1].shape == torch.Size([2, 131, 56, 56])
+    assert feat[2].shape == torch.Size([2, 256, 28, 28])
+
+    # Test CGNet with norm_eval True and with_cp True
+    model = CGNet(norm_eval=True, with_cp=True)
+    with pytest.raises(TypeError):
+        # check invalid pretrained
+        model.init_weights(pretrained=8)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(2, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 3
+    assert feat[0].shape == torch.Size([2, 35, 112, 112])
+    assert feat[1].shape == torch.Size([2, 131, 56, 56])
+    assert feat[2].shape == torch.Size([2, 256, 28, 28])
+
+
+def test_mobilenet_v3():
+    with pytest.raises(AssertionError):
+        # check invalid arch
+        MobileNetV3('big')
+
+    with pytest.raises(AssertionError):
+        # check invalid reduction_factor
+        MobileNetV3(reduction_factor=0)
+
+    with pytest.raises(ValueError):
+        # check invalid out_indices
+        MobileNetV3(out_indices=(0, 1, 15))
+
+    with pytest.raises(ValueError):
+        # check invalid frozen_stages
+        MobileNetV3(frozen_stages=15)
+
+    with pytest.raises(TypeError):
+        # check invalid pretrained
+        model = MobileNetV3()
+        model.init_weights(pretrained=8)
+
+    # Test MobileNetV3 with default settings
+    model = MobileNetV3()
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(2, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 3
+    assert feat[0].shape == (2, 16, 112, 112)
+    assert feat[1].shape == (2, 16, 56, 56)
+    assert feat[2].shape == (2, 576, 28, 28)
+
+    # Test MobileNetV3 with arch = 'large'
+    model = MobileNetV3(arch='large', out_indices=(1, 3, 16))
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(2, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 3
+    assert feat[0].shape == (2, 16, 112, 112)
+    assert feat[1].shape == (2, 24, 56, 56)
+    assert feat[2].shape == (2, 960, 28, 28)
+
+    # Test MobileNetV3 with norm_eval True, with_cp True and frozen_stages=5
+    model = MobileNetV3(norm_eval=True, with_cp=True, frozen_stages=5)
+    with pytest.raises(TypeError):
+        # check invalid pretrained
+        model.init_weights(pretrained=8)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(2, 3, 224, 224)
+    feat = model(imgs)
+    assert len(feat) == 3
+    assert feat[0].shape == (2, 16, 112, 112)
+    assert feat[1].shape == (2, 16, 56, 56)
+    assert feat[2].shape == (2, 576, 28, 28)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_forward.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee8036246bd7df90d15158266cb5bdbfa3a5730f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_forward.py
@@ -0,0 +1,250 @@
+"""pytest tests/test_forward.py."""
+import copy
+from os.path import dirname, exists, join
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm
+
+
+def _demo_mm_inputs(input_shape=(2, 3, 8, 16), num_classes=10):
+    """Create a superset of inputs needed to run test or train batches.
+
+    Args:
+        input_shape (tuple):
+            input batch dimensions
+
+        num_classes (int):
+            number of semantic classes
+    """
+    (N, C, H, W) = input_shape
+
+    rng = np.random.RandomState(0)
+
+    imgs = rng.rand(*input_shape)
+    segs = rng.randint(
+        low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8)
+
+    img_metas = [{
+        'img_shape': (H, W, C),
+        'ori_shape': (H, W, C),
+        'pad_shape': (H, W, C),
+        'filename': '<demo>.png',
+        'scale_factor': 1.0,
+        'flip': False,
+        'flip_direction': 'horizontal'
+    } for _ in range(N)]
+
+    mm_inputs = {
+        'imgs': torch.FloatTensor(imgs),
+        'img_metas': img_metas,
+        'gt_semantic_seg': torch.LongTensor(segs)
+    }
+    return mm_inputs
+
+
+def _get_config_directory():
+    """Find the predefined segmentor config directory."""
+    try:
+        # Assume we are running in the source mmsegmentation repo
+        repo_dpath = dirname(dirname(dirname(__file__)))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmseg
+        repo_dpath = dirname(dirname(dirname(mmseg.__file__)))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+
+
+def _get_config_module(fname):
+    """Load a configuration as a python module."""
+    from mmcv import Config
+    config_dpath = _get_config_directory()
+    config_fpath = join(config_dpath, fname)
+    config_mod = Config.fromfile(config_fpath)
+    return config_mod
+
+
+def _get_segmentor_cfg(fname):
+    """Grab configs necessary to create a segmentor.
+
+    These are deep copied to allow for safe modification of parameters without
+    influencing other tests.
+    """
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    return model
+
+
+def test_pspnet_forward():
+    _test_encoder_decoder_forward(
+        'pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_fcn_forward():
+    _test_encoder_decoder_forward('fcn/fcn_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_deeplabv3_forward():
+    _test_encoder_decoder_forward(
+        'deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_deeplabv3plus_forward():
+    _test_encoder_decoder_forward(
+        'deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_gcnet_forward():
+    _test_encoder_decoder_forward(
+        'gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_ann_forward():
+    _test_encoder_decoder_forward('ann/ann_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_ccnet_forward():
+    if not torch.cuda.is_available():
+        pytest.skip('CCNet requires CUDA')
+    _test_encoder_decoder_forward(
+        'ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_danet_forward():
+    _test_encoder_decoder_forward(
+        'danet/danet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_nonlocal_net_forward():
+    _test_encoder_decoder_forward(
+        'nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_upernet_forward():
+    _test_encoder_decoder_forward(
+        'upernet/upernet_r50_512x1024_40k_cityscapes.py')
+
+
+def test_hrnet_forward():
+    _test_encoder_decoder_forward('hrnet/fcn_hr18s_512x1024_40k_cityscapes.py')
+
+
+def test_ocrnet_forward():
+    _test_encoder_decoder_forward(
+        'ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py')
+
+
+def test_psanet_forward():
+    _test_encoder_decoder_forward(
+        'psanet/psanet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_encnet_forward():
+    _test_encoder_decoder_forward(
+        'encnet/encnet_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_sem_fpn_forward():
+    _test_encoder_decoder_forward('sem_fpn/fpn_r50_512x1024_80k_cityscapes.py')
+
+
+def test_point_rend_forward():
+    _test_encoder_decoder_forward(
+        'point_rend/pointrend_r50_512x1024_80k_cityscapes.py')
+
+
+def test_mobilenet_v2_forward():
+    _test_encoder_decoder_forward(
+        'mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py')
+
+
+def test_dnlnet_forward():
+    _test_encoder_decoder_forward(
+        'dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py')
+
+
+def test_emanet_forward():
+    _test_encoder_decoder_forward(
+        'emanet/emanet_r50-d8_512x1024_80k_cityscapes.py')
+
+
+def get_world_size(process_group):
+
+    return 1
+
+
+def _check_input_dim(self, inputs):
+    pass
+
+
+def _convert_batchnorm(module):
+    module_output = module
+    if isinstance(module, SyncBatchNorm):
+        # to be consistent with SyncBN, we hack dim check function in BN
+        module_output = _BatchNorm(module.num_features, module.eps,
+                                   module.momentum, module.affine,
+                                   module.track_running_stats)
+        if module.affine:
+            module_output.weight.data = module.weight.data.clone().detach()
+            module_output.bias.data = module.bias.data.clone().detach()
+            # keep requires_grad unchanged
+            module_output.weight.requires_grad = module.weight.requires_grad
+            module_output.bias.requires_grad = module.bias.requires_grad
+        module_output.running_mean = module.running_mean
+        module_output.running_var = module.running_var
+        module_output.num_batches_tracked = module.num_batches_tracked
+    for name, child in module.named_children():
+        module_output.add_module(name, _convert_batchnorm(child))
+    del module
+    return module_output
+
+
+@patch('torch.nn.modules.batchnorm._BatchNorm._check_input_dim',
+       _check_input_dim)
+@patch('torch.distributed.get_world_size', get_world_size)
+def _test_encoder_decoder_forward(cfg_file):
+    model = _get_segmentor_cfg(cfg_file)
+    model['pretrained'] = None
+    model['test_cfg']['mode'] = 'whole'
+
+    from mmseg.models import build_segmentor
+    segmentor = build_segmentor(model)
+
+    if isinstance(segmentor.decode_head, nn.ModuleList):
+        num_classes = segmentor.decode_head[-1].num_classes
+    else:
+        num_classes = segmentor.decode_head.num_classes
+    # batch_size=2 for BatchNorm
+    input_shape = (2, 3, 32, 32)
+    mm_inputs = _demo_mm_inputs(input_shape, num_classes=num_classes)
+
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+    gt_semantic_seg = mm_inputs['gt_semantic_seg']
+
+    # convert to cuda Tensor if applicable
+    if torch.cuda.is_available():
+        segmentor = segmentor.cuda()
+        imgs = imgs.cuda()
+        gt_semantic_seg = gt_semantic_seg.cuda()
+    else:
+        segmentor = _convert_batchnorm(segmentor)
+
+    # Test forward train
+    losses = segmentor.forward(
+        imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True)
+    assert isinstance(losses, dict)
+
+    # Test forward test
+    with torch.no_grad():
+        segmentor.eval()
+        # pack into lists
+        img_list = [img[None, :] for img in imgs]
+        img_meta_list = [[img_meta] for img_meta in img_metas]
+        segmentor.forward(img_list, img_meta_list, return_loss=False)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_heads.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8a8493c160f11c2f51ab6dbca365b5312a6e46d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_heads.py
@@ -0,0 +1,834 @@
+from unittest.mock import patch
+
+import pytest
+import torch
+from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
+from mmcv.utils import ConfigDict
+from mmcv.utils.parrots_wrapper import SyncBatchNorm
+
+from mmseg.models.decode_heads import (ANNHead, APCHead, ASPPHead, CCHead,
+                                       DAHead, DepthwiseSeparableASPPHead,
+                                       DepthwiseSeparableFCNHead, DMHead,
+                                       DNLHead, EMAHead, EncHead, FCNHead,
+                                       GCHead, LRASPPHead, NLHead, OCRHead,
+                                       PointHead, PSAHead, PSPHead, UPerHead)
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+
+
+def _conv_has_norm(module, sync_bn):
+    for m in module.modules():
+        if isinstance(m, ConvModule):
+            if not m.with_norm:
+                return False
+            if sync_bn:
+                if not isinstance(m.bn, SyncBatchNorm):
+                    return False
+    return True
+
+
+def to_cuda(module, data):
+    module = module.cuda()
+    if isinstance(data, list):
+        for i in range(len(data)):
+            data[i] = data[i].cuda()
+    return module, data
+
+
+@patch.multiple(BaseDecodeHead, __abstractmethods__=set())
+def test_decode_head():
+
+    with pytest.raises(AssertionError):
+        # default input_transform doesn't accept multiple inputs
+        BaseDecodeHead([32, 16], 16, num_classes=19)
+
+    with pytest.raises(AssertionError):
+        # default input_transform doesn't accept multiple inputs
+        BaseDecodeHead(32, 16, num_classes=19, in_index=[-1, -2])
+
+    with pytest.raises(AssertionError):
+        # supported mode is resize_concat only
+        BaseDecodeHead(32, 16, num_classes=19, input_transform='concat')
+
+    with pytest.raises(AssertionError):
+        # in_channels should be list|tuple
+        BaseDecodeHead(32, 16, num_classes=19, input_transform='resize_concat')
+
+    with pytest.raises(AssertionError):
+        # in_index should be list|tuple
+        BaseDecodeHead([32],
+                       16,
+                       in_index=-1,
+                       num_classes=19,
+                       input_transform='resize_concat')
+
+    with pytest.raises(AssertionError):
+        # len(in_index) should equal len(in_channels)
+        BaseDecodeHead([32, 16],
+                       16,
+                       num_classes=19,
+                       in_index=[-1],
+                       input_transform='resize_concat')
+
+    # test default dropout
+    head = BaseDecodeHead(32, 16, num_classes=19)
+    assert hasattr(head, 'dropout') and head.dropout.p == 0.1
+
+    # test set dropout
+    head = BaseDecodeHead(32, 16, num_classes=19, dropout_ratio=0.2)
+    assert hasattr(head, 'dropout') and head.dropout.p == 0.2
+
+    # test no input_transform
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = BaseDecodeHead(32, 16, num_classes=19)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.in_channels == 32
+    assert head.input_transform is None
+    transformed_inputs = head._transform_inputs(inputs)
+    assert transformed_inputs.shape == (1, 32, 45, 45)
+
+    # test input_transform = resize_concat
+    inputs = [torch.randn(1, 32, 45, 45), torch.randn(1, 16, 21, 21)]
+    head = BaseDecodeHead([32, 16],
+                          16,
+                          num_classes=19,
+                          in_index=[0, 1],
+                          input_transform='resize_concat')
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.in_channels == 48
+    assert head.input_transform == 'resize_concat'
+    transformed_inputs = head._transform_inputs(inputs)
+    assert transformed_inputs.shape == (1, 48, 45, 45)
+
+
+def test_fcn_head():
+
+    with pytest.raises(AssertionError):
+        # num_convs must be not less than 0
+        FCNHead(num_classes=19, num_convs=-1)
+
+    # test no norm_cfg
+    head = FCNHead(in_channels=32, channels=16, num_classes=19)
+    for m in head.modules():
+        if isinstance(m, ConvModule):
+            assert not m.with_norm
+
+    # test with norm_cfg
+    head = FCNHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN'))
+    for m in head.modules():
+        if isinstance(m, ConvModule):
+            assert m.with_norm and isinstance(m.bn, SyncBatchNorm)
+
+    # test concat_input=False
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = FCNHead(
+        in_channels=32, channels=16, num_classes=19, concat_input=False)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert len(head.convs) == 2
+    assert not head.concat_input and not hasattr(head, 'conv_cat')
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # test concat_input=True
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = FCNHead(
+        in_channels=32, channels=16, num_classes=19, concat_input=True)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert len(head.convs) == 2
+    assert head.concat_input
+    assert head.conv_cat.in_channels == 48
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # test kernel_size=3
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = FCNHead(in_channels=32, channels=16, num_classes=19)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    for i in range(len(head.convs)):
+        assert head.convs[i].kernel_size == (3, 3)
+        assert head.convs[i].padding == 1
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # test kernel_size=1
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = FCNHead(in_channels=32, channels=16, num_classes=19, kernel_size=1)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    for i in range(len(head.convs)):
+        assert head.convs[i].kernel_size == (1, 1)
+        assert head.convs[i].padding == 0
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # test num_conv
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = FCNHead(in_channels=32, channels=16, num_classes=19, num_convs=1)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert len(head.convs) == 1
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # test num_conv = 0
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = FCNHead(
+        in_channels=32,
+        channels=32,
+        num_classes=19,
+        num_convs=0,
+        concat_input=False)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert isinstance(head.convs, torch.nn.Identity)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_psp_head():
+
+    with pytest.raises(AssertionError):
+        # pool_scales must be list|tuple
+        PSPHead(in_channels=32, channels=16, num_classes=19, pool_scales=1)
+
+    # test no norm_cfg
+    head = PSPHead(in_channels=32, channels=16, num_classes=19)
+    assert not _conv_has_norm(head, sync_bn=False)
+
+    # test with norm_cfg
+    head = PSPHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN'))
+    assert _conv_has_norm(head, sync_bn=True)
+
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = PSPHead(
+        in_channels=32, channels=16, num_classes=19, pool_scales=(1, 2, 3))
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.psp_modules[0][0].output_size == 1
+    assert head.psp_modules[1][0].output_size == 2
+    assert head.psp_modules[2][0].output_size == 3
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_apc_head():
+
+    with pytest.raises(AssertionError):
+        # pool_scales must be list|tuple
+        APCHead(in_channels=32, channels=16, num_classes=19, pool_scales=1)
+
+    # test no norm_cfg
+    head = APCHead(in_channels=32, channels=16, num_classes=19)
+    assert not _conv_has_norm(head, sync_bn=False)
+
+    # test with norm_cfg
+    head = APCHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN'))
+    assert _conv_has_norm(head, sync_bn=True)
+
+    # fusion=True
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = APCHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        pool_scales=(1, 2, 3),
+        fusion=True)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.fusion is True
+    assert head.acm_modules[0].pool_scale == 1
+    assert head.acm_modules[1].pool_scale == 2
+    assert head.acm_modules[2].pool_scale == 3
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # fusion=False
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = APCHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        pool_scales=(1, 2, 3),
+        fusion=False)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.fusion is False
+    assert head.acm_modules[0].pool_scale == 1
+    assert head.acm_modules[1].pool_scale == 2
+    assert head.acm_modules[2].pool_scale == 3
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_dm_head():
+
+    with pytest.raises(AssertionError):
+        # filter_sizes must be list|tuple
+        DMHead(in_channels=32, channels=16, num_classes=19, filter_sizes=1)
+
+    # test no norm_cfg
+    head = DMHead(in_channels=32, channels=16, num_classes=19)
+    assert not _conv_has_norm(head, sync_bn=False)
+
+    # test with norm_cfg
+    head = DMHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN'))
+    assert _conv_has_norm(head, sync_bn=True)
+
+    # fusion=True
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = DMHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        filter_sizes=(1, 3, 5),
+        fusion=True)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.fusion is True
+    assert head.dcm_modules[0].filter_size == 1
+    assert head.dcm_modules[1].filter_size == 3
+    assert head.dcm_modules[2].filter_size == 5
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # fusion=False
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = DMHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        filter_sizes=(1, 3, 5),
+        fusion=False)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.fusion is False
+    assert head.dcm_modules[0].filter_size == 1
+    assert head.dcm_modules[1].filter_size == 3
+    assert head.dcm_modules[2].filter_size == 5
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_aspp_head():
+
+    with pytest.raises(AssertionError):
+        # pool_scales must be list|tuple
+        ASPPHead(in_channels=32, channels=16, num_classes=19, dilations=1)
+
+    # test no norm_cfg
+    head = ASPPHead(in_channels=32, channels=16, num_classes=19)
+    assert not _conv_has_norm(head, sync_bn=False)
+
+    # test with norm_cfg
+    head = ASPPHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN'))
+    assert _conv_has_norm(head, sync_bn=True)
+
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = ASPPHead(
+        in_channels=32, channels=16, num_classes=19, dilations=(1, 12, 24))
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.aspp_modules[0].conv.dilation == (1, 1)
+    assert head.aspp_modules[1].conv.dilation == (12, 12)
+    assert head.aspp_modules[2].conv.dilation == (24, 24)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_psa_head():
+
+    with pytest.raises(AssertionError):
+        # psa_type must be in 'bi-direction', 'collect', 'distribute'
+        PSAHead(
+            in_channels=32,
+            channels=16,
+            num_classes=19,
+            mask_size=(39, 39),
+            psa_type='gather')
+
+    # test no norm_cfg
+    head = PSAHead(
+        in_channels=32, channels=16, num_classes=19, mask_size=(39, 39))
+    assert not _conv_has_norm(head, sync_bn=False)
+
+    # test with norm_cfg
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        norm_cfg=dict(type='SyncBN'))
+    assert _conv_has_norm(head, sync_bn=True)
+
+    # test 'bi-direction' psa_type
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32, channels=16, num_classes=19, mask_size=(39, 39))
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+    # test 'bi-direction' psa_type, shrink_factor=1
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        shrink_factor=1)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+    # test 'bi-direction' psa_type with soft_max
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        psa_softmax=True)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+    # test 'collect' psa_type
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        psa_type='collect')
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+    # test 'collect' psa_type, shrink_factor=1
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        shrink_factor=1,
+        psa_type='collect')
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+    # test 'collect' psa_type, shrink_factor=1, compact=True
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        psa_type='collect',
+        shrink_factor=1,
+        compact=True)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+    # test 'distribute' psa_type
+    inputs = [torch.randn(1, 32, 39, 39)]
+    head = PSAHead(
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        mask_size=(39, 39),
+        psa_type='distribute')
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 39, 39)
+
+
+def test_gc_head():
+    head = GCHead(in_channels=32, channels=16, num_classes=19)
+    assert len(head.convs) == 2
+    assert hasattr(head, 'gc_block')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_nl_head():
+    head = NLHead(in_channels=32, channels=16, num_classes=19)
+    assert len(head.convs) == 2
+    assert hasattr(head, 'nl_block')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_cc_head():
+    head = CCHead(in_channels=32, channels=16, num_classes=19)
+    assert len(head.convs) == 2
+    assert hasattr(head, 'cca')
+    if not torch.cuda.is_available():
+        pytest.skip('CCHead requires CUDA')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_uper_head():
+
+    with pytest.raises(AssertionError):
+        # fpn_in_channels must be list|tuple
+        UPerHead(in_channels=32, channels=16, num_classes=19)
+
+    # test no norm_cfg
+    head = UPerHead(
+        in_channels=[32, 16], channels=16, num_classes=19, in_index=[-2, -1])
+    assert not _conv_has_norm(head, sync_bn=False)
+
+    # test with norm_cfg
+    head = UPerHead(
+        in_channels=[32, 16],
+        channels=16,
+        num_classes=19,
+        norm_cfg=dict(type='SyncBN'),
+        in_index=[-2, -1])
+    assert _conv_has_norm(head, sync_bn=True)
+
+    inputs = [torch.randn(1, 32, 45, 45), torch.randn(1, 16, 21, 21)]
+    head = UPerHead(
+        in_channels=[32, 16], channels=16, num_classes=19, in_index=[-2, -1])
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_ann_head():
+
+    inputs = [torch.randn(1, 16, 45, 45), torch.randn(1, 32, 21, 21)]
+    head = ANNHead(
+        in_channels=[16, 32],
+        channels=16,
+        num_classes=19,
+        in_index=[-2, -1],
+        project_channels=8)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 21, 21)
+
+
+def test_da_head():
+
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = DAHead(in_channels=32, channels=16, num_classes=19, pam_channels=8)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert isinstance(outputs, tuple) and len(outputs) == 3
+    for output in outputs:
+        assert output.shape == (1, head.num_classes, 45, 45)
+    test_output = head.forward_test(inputs, None, None)
+    assert test_output.shape == (1, head.num_classes, 45, 45)
+
+
+def test_ocr_head():
+
+    inputs = [torch.randn(1, 32, 45, 45)]
+    ocr_head = OCRHead(
+        in_channels=32, channels=16, num_classes=19, ocr_channels=8)
+    fcn_head = FCNHead(in_channels=32, channels=16, num_classes=19)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(ocr_head, inputs)
+        head, inputs = to_cuda(fcn_head, inputs)
+    prev_output = fcn_head(inputs)
+    output = ocr_head(inputs, prev_output)
+    assert output.shape == (1, ocr_head.num_classes, 45, 45)
+
+
+def test_enc_head():
+    # with se_loss, w.o. lateral
+    inputs = [torch.randn(1, 32, 21, 21)]
+    head = EncHead(
+        in_channels=[32], channels=16, num_classes=19, in_index=[-1])
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert isinstance(outputs, tuple) and len(outputs) == 2
+    assert outputs[0].shape == (1, head.num_classes, 21, 21)
+    assert outputs[1].shape == (1, head.num_classes)
+
+    # w.o se_loss, w.o. lateral
+    inputs = [torch.randn(1, 32, 21, 21)]
+    head = EncHead(
+        in_channels=[32],
+        channels=16,
+        use_se_loss=False,
+        num_classes=19,
+        in_index=[-1])
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 21, 21)
+
+    # with se_loss, with lateral
+    inputs = [torch.randn(1, 16, 45, 45), torch.randn(1, 32, 21, 21)]
+    head = EncHead(
+        in_channels=[16, 32],
+        channels=16,
+        add_lateral=True,
+        num_classes=19,
+        in_index=[-2, -1])
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert isinstance(outputs, tuple) and len(outputs) == 2
+    assert outputs[0].shape == (1, head.num_classes, 21, 21)
+    assert outputs[1].shape == (1, head.num_classes)
+    test_output = head.forward_test(inputs, None, None)
+    assert test_output.shape == (1, head.num_classes, 21, 21)
+
+
+def test_dw_aspp_head():
+
+    # test w.o. c1
+    inputs = [torch.randn(1, 32, 45, 45)]
+    head = DepthwiseSeparableASPPHead(
+        c1_in_channels=0,
+        c1_channels=0,
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        dilations=(1, 12, 24))
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.c1_bottleneck is None
+    assert head.aspp_modules[0].conv.dilation == (1, 1)
+    assert head.aspp_modules[1].depthwise_conv.dilation == (12, 12)
+    assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # test with c1
+    inputs = [torch.randn(1, 8, 45, 45), torch.randn(1, 32, 21, 21)]
+    head = DepthwiseSeparableASPPHead(
+        c1_in_channels=8,
+        c1_channels=4,
+        in_channels=32,
+        channels=16,
+        num_classes=19,
+        dilations=(1, 12, 24))
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    assert head.c1_bottleneck.in_channels == 8
+    assert head.c1_bottleneck.out_channels == 4
+    assert head.aspp_modules[0].conv.dilation == (1, 1)
+    assert head.aspp_modules[1].depthwise_conv.dilation == (12, 12)
+    assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_sep_fcn_head():
+    # test sep_fcn_head with concat_input=False
+    head = DepthwiseSeparableFCNHead(
+        in_channels=128,
+        channels=128,
+        concat_input=False,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01))
+    x = [torch.rand(2, 128, 32, 32)]
+    output = head(x)
+    assert output.shape == (2, head.num_classes, 32, 32)
+    assert not head.concat_input
+    assert isinstance(head.convs[0], DepthwiseSeparableConvModule)
+    assert isinstance(head.convs[1], DepthwiseSeparableConvModule)
+    assert head.conv_seg.kernel_size == (1, 1)
+
+    head = DepthwiseSeparableFCNHead(
+        in_channels=64,
+        channels=64,
+        concat_input=True,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01))
+    x = [torch.rand(3, 64, 32, 32)]
+    output = head(x)
+    assert output.shape == (3, head.num_classes, 32, 32)
+    assert head.concat_input
+    assert isinstance(head.convs[0], DepthwiseSeparableConvModule)
+    assert isinstance(head.convs[1], DepthwiseSeparableConvModule)
+
+
+def test_dnl_head():
+    # DNL with 'embedded_gaussian' mode
+    head = DNLHead(in_channels=32, channels=16, num_classes=19)
+    assert len(head.convs) == 2
+    assert hasattr(head, 'dnl_block')
+    assert head.dnl_block.temperature == 0.05
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # NonLocal2d with 'dot_product' mode
+    head = DNLHead(
+        in_channels=32, channels=16, num_classes=19, mode='dot_product')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # NonLocal2d with 'gaussian' mode
+    head = DNLHead(
+        in_channels=32, channels=16, num_classes=19, mode='gaussian')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+    # NonLocal2d with 'concatenation' mode
+    head = DNLHead(
+        in_channels=32, channels=16, num_classes=19, mode='concatenation')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_emanet_head():
+    head = EMAHead(
+        in_channels=32,
+        ema_channels=24,
+        channels=16,
+        num_stages=3,
+        num_bases=16,
+        num_classes=19)
+    for param in head.ema_mid_conv.parameters():
+        assert not param.requires_grad
+    assert hasattr(head, 'ema_module')
+    inputs = [torch.randn(1, 32, 45, 45)]
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(head, inputs)
+    outputs = head(inputs)
+    assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_point_head():
+
+    inputs = [torch.randn(1, 32, 45, 45)]
+    point_head = PointHead(
+        in_channels=[32], in_index=[0], channels=16, num_classes=19)
+    assert len(point_head.fcs) == 3
+    fcn_head = FCNHead(in_channels=32, channels=16, num_classes=19)
+    if torch.cuda.is_available():
+        head, inputs = to_cuda(point_head, inputs)
+        head, inputs = to_cuda(fcn_head, inputs)
+    prev_output = fcn_head(inputs)
+    test_cfg = ConfigDict(
+        subdivision_steps=2, subdivision_num_points=8196, scale_factor=2)
+    output = point_head.forward_test(inputs, prev_output, None, test_cfg)
+    assert output.shape == (1, point_head.num_classes, 180, 180)
+
+
+def test_lraspp_head():
+    with pytest.raises(ValueError):
+        # check invalid input_transform
+        LRASPPHead(
+            in_channels=(16, 16, 576),
+            in_index=(0, 1, 2),
+            channels=128,
+            input_transform='resize_concat',
+            dropout_ratio=0.1,
+            num_classes=19,
+            norm_cfg=dict(type='BN'),
+            act_cfg=dict(type='ReLU'),
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+
+    with pytest.raises(AssertionError):
+        # check invalid branch_channels
+        LRASPPHead(
+            in_channels=(16, 16, 576),
+            in_index=(0, 1, 2),
+            channels=128,
+            branch_channels=64,
+            input_transform='multiple_select',
+            dropout_ratio=0.1,
+            num_classes=19,
+            norm_cfg=dict(type='BN'),
+            act_cfg=dict(type='ReLU'),
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+
+    # test with default settings
+    lraspp_head = LRASPPHead(
+        in_channels=(16, 16, 576),
+        in_index=(0, 1, 2),
+        channels=128,
+        input_transform='multiple_select',
+        dropout_ratio=0.1,
+        num_classes=19,
+        norm_cfg=dict(type='BN'),
+        act_cfg=dict(type='ReLU'),
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+    inputs = [
+        torch.randn(2, 16, 45, 45),
+        torch.randn(2, 16, 28, 28),
+        torch.randn(2, 576, 14, 14)
+    ]
+    with pytest.raises(RuntimeError):
+        # check invalid inputs
+        output = lraspp_head(inputs)
+
+    inputs = [
+        torch.randn(2, 16, 111, 111),
+        torch.randn(2, 16, 77, 77),
+        torch.randn(2, 576, 55, 55)
+    ]
+    output = lraspp_head(inputs)
+    assert output.shape == (2, 19, 111, 111)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_losses.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..005d9391145dbc18766fb81633fb3eda16b825b0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_losses.py
@@ -0,0 +1,204 @@
+import numpy as np
+import pytest
+import torch
+
+from mmseg.models.losses import Accuracy, reduce_loss, weight_reduce_loss
+
+
+def test_utils():
+    loss = torch.rand(1, 3, 4, 4)
+    weight = torch.zeros(1, 3, 4, 4)
+    weight[:, :, :2, :2] = 1
+
+    # test reduce_loss()
+    reduced = reduce_loss(loss, 'none')
+    assert reduced is loss
+
+    reduced = reduce_loss(loss, 'mean')
+    np.testing.assert_almost_equal(reduced.numpy(), loss.mean())
+
+    reduced = reduce_loss(loss, 'sum')
+    np.testing.assert_almost_equal(reduced.numpy(), loss.sum())
+
+    # test weight_reduce_loss()
+    reduced = weight_reduce_loss(loss, weight=None, reduction='none')
+    assert reduced is loss
+
+    reduced = weight_reduce_loss(loss, weight=weight, reduction='mean')
+    target = (loss * weight).mean()
+    np.testing.assert_almost_equal(reduced.numpy(), target)
+
+    reduced = weight_reduce_loss(loss, weight=weight, reduction='sum')
+    np.testing.assert_almost_equal(reduced.numpy(), (loss * weight).sum())
+
+    with pytest.raises(AssertionError):
+        weight_wrong = weight[0, 0, ...]
+        weight_reduce_loss(loss, weight=weight_wrong, reduction='mean')
+
+    with pytest.raises(AssertionError):
+        weight_wrong = weight[:, 0:2, ...]
+        weight_reduce_loss(loss, weight=weight_wrong, reduction='mean')
+
+
+def test_ce_loss():
+    from mmseg.models import build_loss
+
+    # use_mask and use_sigmoid cannot be true at the same time
+    with pytest.raises(AssertionError):
+        loss_cfg = dict(
+            type='CrossEntropyLoss',
+            use_mask=True,
+            use_sigmoid=True,
+            loss_weight=1.0)
+        build_loss(loss_cfg)
+
+    # test loss with class weights
+    loss_cls_cfg = dict(
+        type='CrossEntropyLoss',
+        use_sigmoid=False,
+        class_weight=[0.8, 0.2],
+        loss_weight=1.0)
+    loss_cls = build_loss(loss_cls_cfg)
+    fake_pred = torch.Tensor([[100, -100]])
+    fake_label = torch.Tensor([1]).long()
+    assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(40.))
+
+    loss_cls_cfg = dict(
+        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)
+    loss_cls = build_loss(loss_cls_cfg)
+    assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(200.))
+
+    loss_cls_cfg = dict(
+        type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)
+    loss_cls = build_loss(loss_cls_cfg)
+    assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(100.))
+
+    fake_pred = torch.full(size=(2, 21, 8, 8), fill_value=0.5)
+    fake_label = torch.ones(2, 8, 8).long()
+    assert torch.allclose(
+        loss_cls(fake_pred, fake_label), torch.tensor(0.9503), atol=1e-4)
+    fake_label[:, 0, 0] = 255
+    assert torch.allclose(
+        loss_cls(fake_pred, fake_label, ignore_index=255),
+        torch.tensor(0.9354),
+        atol=1e-4)
+
+    # TODO test use_mask
+
+
+def test_accuracy():
+    # test for empty pred
+    pred = torch.empty(0, 4)
+    label = torch.empty(0)
+    accuracy = Accuracy(topk=1)
+    acc = accuracy(pred, label)
+    assert acc.item() == 0
+
+    pred = torch.Tensor([[0.2, 0.3, 0.6, 0.5], [0.1, 0.1, 0.2, 0.6],
+                         [0.9, 0.0, 0.0, 0.1], [0.4, 0.7, 0.1, 0.1],
+                         [0.0, 0.0, 0.99, 0]])
+    # test for top1
+    true_label = torch.Tensor([2, 3, 0, 1, 2]).long()
+    accuracy = Accuracy(topk=1)
+    acc = accuracy(pred, true_label)
+    assert acc.item() == 100
+
+    # test for top1 with score thresh=0.8
+    true_label = torch.Tensor([2, 3, 0, 1, 2]).long()
+    accuracy = Accuracy(topk=1, thresh=0.8)
+    acc = accuracy(pred, true_label)
+    assert acc.item() == 40
+
+    # test for top2
+    accuracy = Accuracy(topk=2)
+    label = torch.Tensor([3, 2, 0, 0, 2]).long()
+    acc = accuracy(pred, label)
+    assert acc.item() == 100
+
+    # test for both top1 and top2
+    accuracy = Accuracy(topk=(1, 2))
+    true_label = torch.Tensor([2, 3, 0, 1, 2]).long()
+    acc = accuracy(pred, true_label)
+    for a in acc:
+        assert a.item() == 100
+
+    # topk is larger than pred class number
+    with pytest.raises(AssertionError):
+        accuracy = Accuracy(topk=5)
+        accuracy(pred, true_label)
+
+    # wrong topk type
+    with pytest.raises(AssertionError):
+        accuracy = Accuracy(topk='wrong type')
+        accuracy(pred, true_label)
+
+    # label size is larger than required
+    with pytest.raises(AssertionError):
+        label = torch.Tensor([2, 3, 0, 1, 2, 0]).long()  # size mismatch
+        accuracy = Accuracy()
+        accuracy(pred, label)
+
+    # wrong pred dimension
+    with pytest.raises(AssertionError):
+        accuracy = Accuracy()
+        accuracy(pred[:, :, None], true_label)
+
+
+def test_lovasz_loss():
+    from mmseg.models import build_loss
+
+    # loss_type should be 'binary' or 'multi_class'
+    with pytest.raises(AssertionError):
+        loss_cfg = dict(
+            type='LovaszLoss',
+            loss_type='Binary',
+            reduction='none',
+            loss_weight=1.0)
+        build_loss(loss_cfg)
+
+    # reduction should be 'none' when per_image is False.
+    with pytest.raises(AssertionError):
+        loss_cfg = dict(type='LovaszLoss', loss_type='multi_class')
+        build_loss(loss_cfg)
+
+    # test lovasz loss with loss_type = 'multi_class' and per_image = False
+    loss_cfg = dict(type='LovaszLoss', reduction='none', loss_weight=1.0)
+    lovasz_loss = build_loss(loss_cfg)
+    logits = torch.rand(1, 3, 4, 4)
+    labels = (torch.rand(1, 4, 4) * 2).long()
+    lovasz_loss(logits, labels)
+
+    # test lovasz loss with loss_type = 'multi_class' and per_image = True
+    loss_cfg = dict(
+        type='LovaszLoss',
+        per_image=True,
+        reduction='mean',
+        class_weight=[1.0, 2.0, 3.0],
+        loss_weight=1.0)
+    lovasz_loss = build_loss(loss_cfg)
+    logits = torch.rand(1, 3, 4, 4)
+    labels = (torch.rand(1, 4, 4) * 2).long()
+    lovasz_loss(logits, labels, ignore_index=None)
+
+    # test lovasz loss with loss_type = 'binary' and per_image = False
+    loss_cfg = dict(
+        type='LovaszLoss',
+        loss_type='binary',
+        reduction='none',
+        loss_weight=1.0)
+    lovasz_loss = build_loss(loss_cfg)
+    logits = torch.rand(2, 4, 4)
+    labels = (torch.rand(2, 4, 4)).long()
+    lovasz_loss(logits, labels)
+
+    # test lovasz loss with loss_type = 'binary' and per_image = True
+    loss_cfg = dict(
+        type='LovaszLoss',
+        loss_type='binary',
+        per_image=True,
+        reduction='mean',
+        loss_weight=1.0)
+    lovasz_loss = build_loss(loss_cfg)
+    logits = torch.rand(2, 4, 4)
+    labels = (torch.rand(2, 4, 4)).long()
+    lovasz_loss(logits, labels, ignore_index=None)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_necks.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_necks.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fc968450f5e66c804bc7a4ac1cf28a0d635739c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_necks.py
@@ -0,0 +1,18 @@
+import torch
+
+from mmseg.models import FPN
+
+
+def test_fpn():
+    in_channels = [256, 512, 1024, 2048]
+    inputs = [
+        torch.randn(1, c, 56 // 2**i, 56 // 2**i)
+        for i, c in enumerate(in_channels)
+    ]
+
+    fpn = FPN(in_channels, 256, len(in_channels))
+    outputs = fpn(inputs)
+    assert outputs[0].shape == torch.Size([1, 256, 56, 56])
+    assert outputs[1].shape == torch.Size([1, 256, 28, 28])
+    assert outputs[2].shape == torch.Size([1, 256, 14, 14])
+    assert outputs[3].shape == torch.Size([1, 256, 7, 7])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_segmentor.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_segmentor.py
new file mode 100644
index 0000000000000000000000000000000000000000..90d3bf6314f03e11d9728890adc532732b05d14c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_segmentor.py
@@ -0,0 +1,214 @@
+import numpy as np
+import torch
+from mmcv import ConfigDict
+from torch import nn
+
+from mmseg.models import BACKBONES, HEADS, build_segmentor
+from mmseg.models.decode_heads.cascade_decode_head import BaseCascadeDecodeHead
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 8, 16), num_classes=10):
+    """Create a superset of inputs needed to run test or train batches.
+
+    Args:
+        input_shape (tuple):
+            input batch dimensions
+
+        num_classes (int):
+            number of semantic classes
+    """
+    (N, C, H, W) = input_shape
+
+    rng = np.random.RandomState(0)
+
+    imgs = rng.rand(*input_shape)
+    segs = rng.randint(
+        low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8)
+
+    img_metas = [{
+        'img_shape': (H, W, C),
+        'ori_shape': (H, W, C),
+        'pad_shape': (H, W, C),
+        'filename': '<demo>.png',
+        'scale_factor': 1.0,
+        'flip': False,
+        'flip_direction': 'horizontal'
+    } for _ in range(N)]
+
+    mm_inputs = {
+        'imgs': torch.FloatTensor(imgs),
+        'img_metas': img_metas,
+        'gt_semantic_seg': torch.LongTensor(segs)
+    }
+    return mm_inputs
+
+
+@BACKBONES.register_module()
+class ExampleBackbone(nn.Module):
+
+    def __init__(self):
+        super(ExampleBackbone, self).__init__()
+        self.conv = nn.Conv2d(3, 3, 3)
+
+    def init_weights(self, pretrained=None):
+        pass
+
+    def forward(self, x):
+        return [self.conv(x)]
+
+
+@HEADS.register_module()
+class ExampleDecodeHead(BaseDecodeHead):
+
+    def __init__(self):
+        super(ExampleDecodeHead, self).__init__(3, 3, num_classes=19)
+
+    def forward(self, inputs):
+        return self.cls_seg(inputs[0])
+
+
+@HEADS.register_module()
+class ExampleCascadeDecodeHead(BaseCascadeDecodeHead):
+
+    def __init__(self):
+        super(ExampleCascadeDecodeHead, self).__init__(3, 3, num_classes=19)
+
+    def forward(self, inputs, prev_out):
+        return self.cls_seg(inputs[0])
+
+
+def _segmentor_forward_train_test(segmentor):
+    if isinstance(segmentor.decode_head, nn.ModuleList):
+        num_classes = segmentor.decode_head[-1].num_classes
+    else:
+        num_classes = segmentor.decode_head.num_classes
+    # batch_size=2 for BatchNorm
+    mm_inputs = _demo_mm_inputs(num_classes=num_classes)
+
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+    gt_semantic_seg = mm_inputs['gt_semantic_seg']
+
+    # convert to cuda Tensor if applicable
+    if torch.cuda.is_available():
+        segmentor = segmentor.cuda()
+        imgs = imgs.cuda()
+        gt_semantic_seg = gt_semantic_seg.cuda()
+
+    # Test forward train
+    losses = segmentor.forward(
+        imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True)
+    assert isinstance(losses, dict)
+
+    # Test forward simple test
+    with torch.no_grad():
+        segmentor.eval()
+        # pack into lists
+        img_list = [img[None, :] for img in imgs]
+        img_meta_list = [[img_meta] for img_meta in img_metas]
+        segmentor.forward(img_list, img_meta_list, return_loss=False)
+
+    # Test forward aug test
+    with torch.no_grad():
+        segmentor.eval()
+        # pack into lists
+        img_list = [img[None, :] for img in imgs]
+        img_list = img_list + img_list
+        img_meta_list = [[img_meta] for img_meta in img_metas]
+        img_meta_list = img_meta_list + img_meta_list
+        segmentor.forward(img_list, img_meta_list, return_loss=False)
+
+
+def test_encoder_decoder():
+
+    # test 1 decode head, w.o. aux head
+
+    cfg = ConfigDict(
+        type='EncoderDecoder',
+        backbone=dict(type='ExampleBackbone'),
+        decode_head=dict(type='ExampleDecodeHead'),
+        train_cfg=None,
+        test_cfg=dict(mode='whole'))
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+    # test slide mode
+    cfg.test_cfg = ConfigDict(mode='slide', crop_size=(3, 3), stride=(2, 2))
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+    # test 1 decode head, 1 aux head
+    cfg = ConfigDict(
+        type='EncoderDecoder',
+        backbone=dict(type='ExampleBackbone'),
+        decode_head=dict(type='ExampleDecodeHead'),
+        auxiliary_head=dict(type='ExampleDecodeHead'))
+    cfg.test_cfg = ConfigDict(mode='whole')
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+    # test 1 decode head, 2 aux head
+    cfg = ConfigDict(
+        type='EncoderDecoder',
+        backbone=dict(type='ExampleBackbone'),
+        decode_head=dict(type='ExampleDecodeHead'),
+        auxiliary_head=[
+            dict(type='ExampleDecodeHead'),
+            dict(type='ExampleDecodeHead')
+        ])
+    cfg.test_cfg = ConfigDict(mode='whole')
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+
+def test_cascade_encoder_decoder():
+
+    # test 1 decode head, w.o. aux head
+    cfg = ConfigDict(
+        type='CascadeEncoderDecoder',
+        num_stages=2,
+        backbone=dict(type='ExampleBackbone'),
+        decode_head=[
+            dict(type='ExampleDecodeHead'),
+            dict(type='ExampleCascadeDecodeHead')
+        ])
+    cfg.test_cfg = ConfigDict(mode='whole')
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+    # test slide mode
+    cfg.test_cfg = ConfigDict(mode='slide', crop_size=(3, 3), stride=(2, 2))
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+    # test 1 decode head, 1 aux head
+    cfg = ConfigDict(
+        type='CascadeEncoderDecoder',
+        num_stages=2,
+        backbone=dict(type='ExampleBackbone'),
+        decode_head=[
+            dict(type='ExampleDecodeHead'),
+            dict(type='ExampleCascadeDecodeHead')
+        ],
+        auxiliary_head=dict(type='ExampleDecodeHead'))
+    cfg.test_cfg = ConfigDict(mode='whole')
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
+
+    # test 1 decode head, 2 aux head
+    cfg = ConfigDict(
+        type='CascadeEncoderDecoder',
+        num_stages=2,
+        backbone=dict(type='ExampleBackbone'),
+        decode_head=[
+            dict(type='ExampleDecodeHead'),
+            dict(type='ExampleCascadeDecodeHead')
+        ],
+        auxiliary_head=[
+            dict(type='ExampleDecodeHead'),
+            dict(type='ExampleDecodeHead')
+        ])
+    cfg.test_cfg = ConfigDict(mode='whole')
+    segmentor = build_segmentor(cfg)
+    _segmentor_forward_train_test(segmentor)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_unet.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..febe4f0c970c3bc44bd9e8170c706df32122b9b4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_models/test_unet.py
@@ -0,0 +1,833 @@
+import pytest
+import torch
+from mmcv.cnn import ConvModule
+from mmcv.utils.parrots_wrapper import _BatchNorm
+from torch import nn
+
+from mmseg.models.backbones.unet import (BasicConvBlock, DeconvModule,
+                                         InterpConv, UNet, UpConvBlock)
+
+
+def check_norm_state(modules, train_state):
+    """Check if norm layer is in correct train state."""
+    for mod in modules:
+        if isinstance(mod, _BatchNorm):
+            if mod.training != train_state:
+                return False
+    return True
+
+
+def test_unet_basic_conv_block():
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+        BasicConvBlock(64, 64, dcn=dcn)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv3')
+        ]
+        BasicConvBlock(64, 64, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet
+        plugins = [
+            dict(
+                cfg=dict(
+                    type='GeneralizedAttention',
+                    spatial_range=-1,
+                    num_heads=8,
+                    attention_type='0010',
+                    kv_stride=2),
+                position='after_conv2')
+        ]
+        BasicConvBlock(64, 64, plugins=plugins)
+
+    # test BasicConvBlock with checkpoint forward
+    block = BasicConvBlock(16, 16, with_cp=True)
+    assert block.with_cp
+    x = torch.randn(1, 16, 64, 64, requires_grad=True)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 16, 64, 64])
+
+    block = BasicConvBlock(16, 16, with_cp=False)
+    assert not block.with_cp
+    x = torch.randn(1, 16, 64, 64)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 16, 64, 64])
+
+    # test BasicConvBlock with stride convolution to downsample
+    block = BasicConvBlock(16, 16, stride=2)
+    x = torch.randn(1, 16, 64, 64)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 16, 32, 32])
+
+    # test BasicConvBlock structure and forward
+    block = BasicConvBlock(16, 64, num_convs=3, dilation=3)
+    assert block.convs[0].conv.in_channels == 16
+    assert block.convs[0].conv.out_channels == 64
+    assert block.convs[0].conv.kernel_size == (3, 3)
+    assert block.convs[0].conv.dilation == (1, 1)
+    assert block.convs[0].conv.padding == (1, 1)
+
+    assert block.convs[1].conv.in_channels == 64
+    assert block.convs[1].conv.out_channels == 64
+    assert block.convs[1].conv.kernel_size == (3, 3)
+    assert block.convs[1].conv.dilation == (3, 3)
+    assert block.convs[1].conv.padding == (3, 3)
+
+    assert block.convs[2].conv.in_channels == 64
+    assert block.convs[2].conv.out_channels == 64
+    assert block.convs[2].conv.kernel_size == (3, 3)
+    assert block.convs[2].conv.dilation == (3, 3)
+    assert block.convs[2].conv.padding == (3, 3)
+
+
+def test_deconv_module():
+    with pytest.raises(AssertionError):
+        # kernel_size should be greater than or equal to scale_factor and
+        # (kernel_size - scale_factor) should be even numbers
+        DeconvModule(64, 32, kernel_size=1, scale_factor=2)
+
+    with pytest.raises(AssertionError):
+        # kernel_size should be greater than or equal to scale_factor and
+        # (kernel_size - scale_factor) should be even numbers
+        DeconvModule(64, 32, kernel_size=3, scale_factor=2)
+
+    with pytest.raises(AssertionError):
+        # kernel_size should be greater than or equal to scale_factor and
+        # (kernel_size - scale_factor) should be even numbers
+        DeconvModule(64, 32, kernel_size=5, scale_factor=4)
+
+    # test DeconvModule with checkpoint forward and upsample 2X.
+    block = DeconvModule(64, 32, with_cp=True)
+    assert block.with_cp
+    x = torch.randn(1, 64, 128, 128, requires_grad=True)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    block = DeconvModule(64, 32, with_cp=False)
+    assert not block.with_cp
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test DeconvModule with different kernel size for upsample 2X.
+    x = torch.randn(1, 64, 64, 64)
+    block = DeconvModule(64, 32, kernel_size=2, scale_factor=2)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 128, 128])
+
+    block = DeconvModule(64, 32, kernel_size=6, scale_factor=2)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 128, 128])
+
+    # test DeconvModule with different kernel size for upsample 4X.
+    x = torch.randn(1, 64, 64, 64)
+    block = DeconvModule(64, 32, kernel_size=4, scale_factor=4)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    block = DeconvModule(64, 32, kernel_size=6, scale_factor=4)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+
+def test_interp_conv():
+    # test InterpConv with checkpoint forward and upsample 2X.
+    block = InterpConv(64, 32, with_cp=True)
+    assert block.with_cp
+    x = torch.randn(1, 64, 128, 128, requires_grad=True)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    block = InterpConv(64, 32, with_cp=False)
+    assert not block.with_cp
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test InterpConv with conv_first=False for upsample 2X.
+    block = InterpConv(64, 32, conv_first=False)
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(x)
+    assert isinstance(block.interp_upsample[0], nn.Upsample)
+    assert isinstance(block.interp_upsample[1], ConvModule)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test InterpConv with conv_first=True for upsample 2X.
+    block = InterpConv(64, 32, conv_first=True)
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(x)
+    assert isinstance(block.interp_upsample[0], ConvModule)
+    assert isinstance(block.interp_upsample[1], nn.Upsample)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test InterpConv with bilinear upsample for upsample 2X.
+    block = InterpConv(
+        64,
+        32,
+        conv_first=False,
+        upsampe_cfg=dict(scale_factor=2, mode='bilinear', align_corners=False))
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(x)
+    assert isinstance(block.interp_upsample[0], nn.Upsample)
+    assert isinstance(block.interp_upsample[1], ConvModule)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+    assert block.interp_upsample[0].mode == 'bilinear'
+
+    # test InterpConv with nearest upsample for upsample 2X.
+    block = InterpConv(
+        64,
+        32,
+        conv_first=False,
+        upsampe_cfg=dict(scale_factor=2, mode='nearest'))
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(x)
+    assert isinstance(block.interp_upsample[0], nn.Upsample)
+    assert isinstance(block.interp_upsample[1], ConvModule)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+    assert block.interp_upsample[0].mode == 'nearest'
+
+
+def test_up_conv_block():
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+        UpConvBlock(BasicConvBlock, 64, 32, 32, dcn=dcn)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv3')
+        ]
+        UpConvBlock(BasicConvBlock, 64, 32, 32, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet
+        plugins = [
+            dict(
+                cfg=dict(
+                    type='GeneralizedAttention',
+                    spatial_range=-1,
+                    num_heads=8,
+                    attention_type='0010',
+                    kv_stride=2),
+                position='after_conv2')
+        ]
+        UpConvBlock(BasicConvBlock, 64, 32, 32, plugins=plugins)
+
+    # test UpConvBlock with checkpoint forward and upsample 2X.
+    block = UpConvBlock(BasicConvBlock, 64, 32, 32, with_cp=True)
+    skip_x = torch.randn(1, 32, 256, 256, requires_grad=True)
+    x = torch.randn(1, 64, 128, 128, requires_grad=True)
+    x_out = block(skip_x, x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test UpConvBlock with upsample=True for upsample 2X. The spatial size of
+    # skip_x is 2X larger than x.
+    block = UpConvBlock(
+        BasicConvBlock, 64, 32, 32, upsample_cfg=dict(type='InterpConv'))
+    skip_x = torch.randn(1, 32, 256, 256)
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(skip_x, x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test UpConvBlock with upsample=False for upsample 2X. The spatial size of
+    # skip_x is the same as that of x.
+    block = UpConvBlock(BasicConvBlock, 64, 32, 32, upsample_cfg=None)
+    skip_x = torch.randn(1, 32, 256, 256)
+    x = torch.randn(1, 64, 256, 256)
+    x_out = block(skip_x, x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test UpConvBlock with different upsample method for upsample 2X.
+    # The upsample method is interpolation upsample (bilinear or nearest).
+    block = UpConvBlock(
+        BasicConvBlock,
+        64,
+        32,
+        32,
+        upsample_cfg=dict(
+            type='InterpConv',
+            upsampe_cfg=dict(
+                scale_factor=2, mode='bilinear', align_corners=False)))
+    skip_x = torch.randn(1, 32, 256, 256)
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(skip_x, x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test UpConvBlock with different upsample method for upsample 2X.
+    # The upsample method is deconvolution upsample.
+    block = UpConvBlock(
+        BasicConvBlock,
+        64,
+        32,
+        32,
+        upsample_cfg=dict(type='DeconvModule', kernel_size=4, scale_factor=2))
+    skip_x = torch.randn(1, 32, 256, 256)
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(skip_x, x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    # test BasicConvBlock structure and forward
+    block = UpConvBlock(
+        conv_block=BasicConvBlock,
+        in_channels=64,
+        skip_channels=32,
+        out_channels=32,
+        num_convs=3,
+        dilation=3,
+        upsample_cfg=dict(
+            type='InterpConv',
+            upsampe_cfg=dict(
+                scale_factor=2, mode='bilinear', align_corners=False)))
+    skip_x = torch.randn(1, 32, 256, 256)
+    x = torch.randn(1, 64, 128, 128)
+    x_out = block(skip_x, x)
+    assert x_out.shape == torch.Size([1, 32, 256, 256])
+
+    assert block.conv_block.convs[0].conv.in_channels == 64
+    assert block.conv_block.convs[0].conv.out_channels == 32
+    assert block.conv_block.convs[0].conv.kernel_size == (3, 3)
+    assert block.conv_block.convs[0].conv.dilation == (1, 1)
+    assert block.conv_block.convs[0].conv.padding == (1, 1)
+
+    assert block.conv_block.convs[1].conv.in_channels == 32
+    assert block.conv_block.convs[1].conv.out_channels == 32
+    assert block.conv_block.convs[1].conv.kernel_size == (3, 3)
+    assert block.conv_block.convs[1].conv.dilation == (3, 3)
+    assert block.conv_block.convs[1].conv.padding == (3, 3)
+
+    assert block.conv_block.convs[2].conv.in_channels == 32
+    assert block.conv_block.convs[2].conv.out_channels == 32
+    assert block.conv_block.convs[2].conv.kernel_size == (3, 3)
+    assert block.conv_block.convs[2].conv.dilation == (3, 3)
+    assert block.conv_block.convs[2].conv.padding == (3, 3)
+
+    assert block.upsample.interp_upsample[1].conv.in_channels == 64
+    assert block.upsample.interp_upsample[1].conv.out_channels == 32
+    assert block.upsample.interp_upsample[1].conv.kernel_size == (1, 1)
+    assert block.upsample.interp_upsample[1].conv.dilation == (1, 1)
+    assert block.upsample.interp_upsample[1].conv.padding == (0, 0)
+
+
+def test_unet():
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
+        UNet(3, 64, 5, dcn=dcn)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet.
+        plugins = [
+            dict(
+                cfg=dict(type='ContextBlock', ratio=1. / 16),
+                position='after_conv3')
+        ]
+        UNet(3, 64, 5, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # Not implemented yet
+        plugins = [
+            dict(
+                cfg=dict(
+                    type='GeneralizedAttention',
+                    spatial_range=-1,
+                    num_heads=8,
+                    attention_type='0010',
+                    kv_stride=2),
+                position='after_conv2')
+        ]
+        UNet(3, 64, 5, plugins=plugins)
+
+    with pytest.raises(AssertionError):
+        # Check whether the input image size can be devisible by the whole
+        # downsample rate of the encoder. The whole downsample rate of this
+        # case is 8.
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=4,
+            strides=(1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2),
+            downsamples=(True, True, True),
+            enc_dilations=(1, 1, 1, 1),
+            dec_dilations=(1, 1, 1))
+        x = torch.randn(2, 3, 65, 65)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check whether the input image size can be devisible by the whole
+        # downsample rate of the encoder. The whole downsample rate of this
+        # case is 16.
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 65, 65)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check whether the input image size can be devisible by the whole
+        # downsample rate of the encoder. The whole downsample rate of this
+        # case is 8.
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, False),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 65, 65)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check whether the input image size can be devisible by the whole
+        # downsample rate of the encoder. The whole downsample rate of this
+        # case is 8.
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 2, 2, 2, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, False),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 65, 65)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check whether the input image size can be devisible by the whole
+        # downsample rate of the encoder. The whole downsample rate of this
+        # case is 32.
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=6,
+            strides=(1, 1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2, 2),
+            downsamples=(True, True, True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1, 1))
+        x = torch.randn(2, 3, 65, 65)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check if num_stages matchs strides, len(strides)=num_stages
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 64, 64)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check if num_stages matchs strides, len(enc_num_convs)=num_stages
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 64, 64)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check if num_stages matchs strides, len(dec_num_convs)=num_stages-1
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2, 2),
+            downsamples=(True, True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 64, 64)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check if num_stages matchs strides, len(downsamples)=num_stages-1
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 64, 64)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check if num_stages matchs strides, len(enc_dilations)=num_stages
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, True),
+            enc_dilations=(1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1))
+        x = torch.randn(2, 3, 64, 64)
+        unet(x)
+
+    with pytest.raises(AssertionError):
+        # Check if num_stages matchs strides, len(dec_dilations)=num_stages-1
+        unet = UNet(
+            in_channels=3,
+            base_channels=64,
+            num_stages=5,
+            strides=(1, 1, 1, 1, 1),
+            enc_num_convs=(2, 2, 2, 2, 2),
+            dec_num_convs=(2, 2, 2, 2),
+            downsamples=(True, True, True, True),
+            enc_dilations=(1, 1, 1, 1, 1),
+            dec_dilations=(1, 1, 1, 1, 1))
+        x = torch.randn(2, 3, 64, 64)
+        unet(x)
+
+    # test UNet norm_eval=True
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        norm_eval=True)
+    unet.train()
+    assert check_norm_state(unet.modules(), False)
+
+    # test UNet norm_eval=False
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1),
+        norm_eval=False)
+    unet.train()
+    assert check_norm_state(unet.modules(), True)
+
+    # test UNet forward and outputs. The whole downsample rate is 16.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 8.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 8.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 2, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 4.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
+    assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 4.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
+    assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 8.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 4.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
+    assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 2.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, False, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 64, 64])
+    assert x_outs[1].shape == torch.Size([2, 512, 64, 64])
+    assert x_outs[2].shape == torch.Size([2, 256, 64, 64])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 1.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 1, 1, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(False, False, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 128, 128])
+    assert x_outs[1].shape == torch.Size([2, 512, 128, 128])
+    assert x_outs[2].shape == torch.Size([2, 256, 128, 128])
+    assert x_outs[3].shape == torch.Size([2, 128, 128, 128])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 16.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, True),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+    print(unet)
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 8.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+    print(unet)
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 8.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 2, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, True, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+    print(unet)
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
+    assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet forward and outputs. The whole downsample rate is 4.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+    print(unet)
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
+    assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+
+    # test UNet init_weights method.
+    unet = UNet(
+        in_channels=3,
+        base_channels=64,
+        num_stages=5,
+        strides=(1, 2, 2, 1, 1),
+        enc_num_convs=(2, 2, 2, 2, 2),
+        dec_num_convs=(2, 2, 2, 2),
+        downsamples=(True, True, False, False),
+        enc_dilations=(1, 1, 1, 1, 1),
+        dec_dilations=(1, 1, 1, 1))
+    unet.init_weights(pretrained=None)
+    print(unet)
+    x = torch.randn(2, 3, 128, 128)
+    x_outs = unet(x)
+    assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
+    assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
+    assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
+    assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
+    assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_sampler.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c79c16277511db4c470a5cbe7b5aa61b0b0b9bd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_sampler.py
@@ -0,0 +1,38 @@
+import pytest
+import torch
+
+from mmseg.core import OHEMPixelSampler
+from mmseg.models.decode_heads import FCNHead
+
+
+def _context_for_ohem():
+    return FCNHead(in_channels=32, channels=16, num_classes=19)
+
+
+def test_ohem_sampler():
+
+    with pytest.raises(AssertionError):
+        # seg_logit and seg_label must be of the same size
+        sampler = OHEMPixelSampler(context=_context_for_ohem())
+        seg_logit = torch.randn(1, 19, 45, 45)
+        seg_label = torch.randint(0, 19, size=(1, 1, 89, 89))
+        sampler.sample(seg_logit, seg_label)
+
+    # test with thresh
+    sampler = OHEMPixelSampler(
+        context=_context_for_ohem(), thresh=0.7, min_kept=200)
+    seg_logit = torch.randn(1, 19, 45, 45)
+    seg_label = torch.randint(0, 19, size=(1, 1, 45, 45))
+    seg_weight = sampler.sample(seg_logit, seg_label)
+    assert seg_weight.shape[0] == seg_logit.shape[0]
+    assert seg_weight.shape[1:] == seg_logit.shape[2:]
+    assert seg_weight.sum() > 200
+
+    # test w.o thresh
+    sampler = OHEMPixelSampler(context=_context_for_ohem(), min_kept=200)
+    seg_logit = torch.randn(1, 19, 45, 45)
+    seg_label = torch.randint(0, 19, size=(1, 1, 45, 45))
+    seg_weight = sampler.sample(seg_logit, seg_label)
+    assert seg_weight.shape[0] == seg_logit.shape[0]
+    assert seg_weight.shape[1:] == seg_logit.shape[2:]
+    assert seg_weight.sum() == 200
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_inverted_residual_module.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_inverted_residual_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d5eecf15bf7d5f5ad54aa8e71d61cb40973fa8d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_inverted_residual_module.py
@@ -0,0 +1,120 @@
+import mmcv
+import pytest
+import torch
+
+from mmseg.models.utils import InvertedResidual, InvertedResidualV3
+
+
+def test_inv_residual():
+    with pytest.raises(AssertionError):
+        # test stride assertion.
+        InvertedResidual(32, 32, 3, 4)
+
+    # test default config with res connection.
+    # set expand_ratio = 4, stride = 1 and inp=oup.
+    inv_module = InvertedResidual(32, 32, 1, 4)
+    assert inv_module.use_res_connect
+    assert inv_module.conv[0].kernel_size == (1, 1)
+    assert inv_module.conv[0].padding == 0
+    assert inv_module.conv[1].kernel_size == (3, 3)
+    assert inv_module.conv[1].padding == 1
+    assert inv_module.conv[0].with_norm
+    assert inv_module.conv[1].with_norm
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 64, 64)
+
+    # test inv_residual module without res connection.
+    # set expand_ratio = 4, stride = 2.
+    inv_module = InvertedResidual(32, 32, 2, 4)
+    assert not inv_module.use_res_connect
+    assert inv_module.conv[0].kernel_size == (1, 1)
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 32, 32)
+
+    # test expand_ratio == 1
+    inv_module = InvertedResidual(32, 32, 1, 1)
+    assert inv_module.conv[0].kernel_size == (3, 3)
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 64, 64)
+
+    # test with checkpoint forward
+    inv_module = InvertedResidual(32, 32, 1, 1, with_cp=True)
+    assert inv_module.with_cp
+    x = torch.rand(1, 32, 64, 64, requires_grad=True)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 64, 64)
+
+
+def test_inv_residualv3():
+    with pytest.raises(AssertionError):
+        # test stride assertion.
+        InvertedResidualV3(32, 32, 16, stride=3)
+
+    with pytest.raises(AssertionError):
+        # test assertion.
+        InvertedResidualV3(32, 32, 16, with_expand_conv=False)
+
+    # test with se_cfg=None, with_expand_conv=False
+    inv_module = InvertedResidualV3(32, 32, 32, with_expand_conv=False)
+
+    assert inv_module.with_res_shortcut is True
+    assert inv_module.with_se is False
+    assert inv_module.with_expand_conv is False
+    assert not hasattr(inv_module, 'expand_conv')
+    assert isinstance(inv_module.depthwise_conv.conv, torch.nn.Conv2d)
+    assert inv_module.depthwise_conv.conv.kernel_size == (3, 3)
+    assert inv_module.depthwise_conv.conv.stride == (1, 1)
+    assert inv_module.depthwise_conv.conv.padding == (1, 1)
+    assert isinstance(inv_module.depthwise_conv.bn, torch.nn.BatchNorm2d)
+    assert isinstance(inv_module.depthwise_conv.activate, torch.nn.ReLU)
+    assert inv_module.linear_conv.conv.kernel_size == (1, 1)
+    assert inv_module.linear_conv.conv.stride == (1, 1)
+    assert inv_module.linear_conv.conv.padding == (0, 0)
+    assert isinstance(inv_module.linear_conv.bn, torch.nn.BatchNorm2d)
+
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 64, 64)
+
+    # test with se_cfg and with_expand_conv
+    se_cfg = dict(
+        channels=16,
+        ratio=4,
+        act_cfg=(dict(type='ReLU'),
+                 dict(type='HSigmoid', bias=3.0, divisor=6.0)))
+    act_cfg = dict(type='HSwish')
+    inv_module = InvertedResidualV3(
+        32, 40, 16, 3, 2, se_cfg=se_cfg, act_cfg=act_cfg)
+    assert inv_module.with_res_shortcut is False
+    assert inv_module.with_se is True
+    assert inv_module.with_expand_conv is True
+    assert inv_module.expand_conv.conv.kernel_size == (1, 1)
+    assert inv_module.expand_conv.conv.stride == (1, 1)
+    assert inv_module.expand_conv.conv.padding == (0, 0)
+    assert isinstance(inv_module.expand_conv.activate, mmcv.cnn.HSwish)
+
+    assert isinstance(inv_module.depthwise_conv.conv,
+                      mmcv.cnn.bricks.Conv2dAdaptivePadding)
+    assert inv_module.depthwise_conv.conv.kernel_size == (3, 3)
+    assert inv_module.depthwise_conv.conv.stride == (2, 2)
+    assert inv_module.depthwise_conv.conv.padding == (0, 0)
+    assert isinstance(inv_module.depthwise_conv.bn, torch.nn.BatchNorm2d)
+    assert isinstance(inv_module.depthwise_conv.activate, mmcv.cnn.HSwish)
+    assert inv_module.linear_conv.conv.kernel_size == (1, 1)
+    assert inv_module.linear_conv.conv.stride == (1, 1)
+    assert inv_module.linear_conv.conv.padding == (0, 0)
+    assert isinstance(inv_module.linear_conv.bn, torch.nn.BatchNorm2d)
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 40, 32, 32)
+
+    # test with checkpoint forward
+    inv_module = InvertedResidualV3(
+        32, 40, 16, 3, 2, se_cfg=se_cfg, act_cfg=act_cfg, with_cp=True)
+    assert inv_module.with_cp
+    x = torch.randn(2, 32, 64, 64, requires_grad=True)
+    output = inv_module(x)
+    assert output.shape == (2, 40, 32, 32)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_make_divisible.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_make_divisible.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e9d1062ffcbe3213a22701bad826ae48a52ecee
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_make_divisible.py
@@ -0,0 +1,13 @@
+from mmseg.models.utils import make_divisible
+
+
+def test_make_divisible():
+    # test with min_value = None
+    assert make_divisible(10, 4) == 12
+    assert make_divisible(9, 4) == 12
+    assert make_divisible(1, 4) == 4
+
+    # test with min_value = 8
+    assert make_divisible(10, 4, 8) == 12
+    assert make_divisible(9, 4, 8) == 12
+    assert make_divisible(1, 4, 8) == 8
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_se_layer.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_se_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bba7b33b93434a7021c69ee2053451cd4066d9d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tests/test_utils/test_se_layer.py
@@ -0,0 +1,41 @@
+import mmcv
+import pytest
+import torch
+
+from mmseg.models.utils.se_layer import SELayer
+
+
+def test_se_layer():
+    with pytest.raises(AssertionError):
+        # test act_cfg assertion.
+        SELayer(32, act_cfg=(dict(type='ReLU'), ))
+
+    # test config with channels = 16.
+    se_layer = SELayer(16)
+    assert se_layer.conv1.conv.kernel_size == (1, 1)
+    assert se_layer.conv1.conv.stride == (1, 1)
+    assert se_layer.conv1.conv.padding == (0, 0)
+    assert isinstance(se_layer.conv1.activate, torch.nn.ReLU)
+    assert se_layer.conv2.conv.kernel_size == (1, 1)
+    assert se_layer.conv2.conv.stride == (1, 1)
+    assert se_layer.conv2.conv.padding == (0, 0)
+    assert isinstance(se_layer.conv2.activate, mmcv.cnn.HSigmoid)
+
+    x = torch.rand(1, 16, 64, 64)
+    output = se_layer(x)
+    assert output.shape == (1, 16, 64, 64)
+
+    # test config with channels = 16, act_cfg = dict(type='ReLU').
+    se_layer = SELayer(16, act_cfg=dict(type='ReLU'))
+    assert se_layer.conv1.conv.kernel_size == (1, 1)
+    assert se_layer.conv1.conv.stride == (1, 1)
+    assert se_layer.conv1.conv.padding == (0, 0)
+    assert isinstance(se_layer.conv1.activate, torch.nn.ReLU)
+    assert se_layer.conv2.conv.kernel_size == (1, 1)
+    assert se_layer.conv2.conv.stride == (1, 1)
+    assert se_layer.conv2.conv.padding == (0, 0)
+    assert isinstance(se_layer.conv2.activate, torch.nn.ReLU)
+
+    x = torch.rand(1, 16, 64, 64)
+    output = se_layer(x)
+    assert output.shape == (1, 16, 64, 64)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/benchmark.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa184d3b8d0d57eb908d5c0e93de1c085cb8e727
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/benchmark.py
@@ -0,0 +1,82 @@
+import argparse
+import time
+
+import torch
+from mmcv import Config
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint
+
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.models import build_segmentor
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMSeg benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    # parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument(
+        '--log-interval', type=int, default=50, help='interval of logging')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    # set cudnn_benchmark
+    torch.backends.cudnn.benchmark = False
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.workers_per_gpu,
+        dist=False,
+        shuffle=False)
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))
+    # load_checkpoint(model, args.checkpoint, map_location='cpu')
+
+    model = MMDataParallel(model, device_ids=[0])
+
+    model.eval()
+
+    # the first several iterations may be very slow so skip them
+    num_warmup = 5
+    pure_inf_time = 0
+    total_iters = 200
+
+    # benchmark with 200 image and take the average
+    for i, data in enumerate(data_loader):
+
+        torch.cuda.synchronize()
+        start_time = time.perf_counter()
+
+        with torch.no_grad():
+            model(return_loss=False, rescale=True, **data)
+
+        torch.cuda.synchronize()
+        elapsed = time.perf_counter() - start_time
+
+        if i >= num_warmup:
+            pure_inf_time += elapsed
+            if (i + 1) % args.log_interval == 0:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Done image [{i + 1:<3}/ {total_iters}], '
+                      f'fps: {fps:.2f} img / s')
+
+        if (i + 1) == total_iters:
+            fps = (i + 1 - num_warmup) / pure_inf_time
+            print(f'Overall fps: {fps:.2f} img / s')
+            break
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/chase_db1.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..56bb210edbb359caf57b6048b8e09b94ef295152
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/chase_db1.py
@@ -0,0 +1,87 @@
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+
+CHASE_DB1_LEN = 28 * 3
+TRAINING_LEN = 60
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert CHASE_DB1 dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'CHASE_DB1')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mmcv.mkdir_or_exist(out_dir)
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting CHASEDB1.zip...')
+        zip_file = zipfile.ZipFile(dataset_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+
+        assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
+            'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN)
+
+        for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/cityscapes.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..99d05b41f58c1d2b92a6cbb4f5859c6d696d0ac8
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/cityscapes.py
@@ -0,0 +1,55 @@
+import argparse
+import os.path as osp
+
+import mmcv
+from cityscapesscripts.preparation.json2labelImg import json2labelImg
+
+
+def convert_json_to_label(json_file):
+    label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
+    json2labelImg(json_file, label_file, 'trainIds')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert Cityscapes annotations to TrainIds')
+    parser.add_argument('cityscapes_path', help='cityscapes data path')
+    parser.add_argument('--gt-dir', default='gtFine', type=str)
+    parser.add_argument('-o', '--out-dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    cityscapes_path = args.cityscapes_path
+    out_dir = args.out_dir if args.out_dir else cityscapes_path
+    mmcv.mkdir_or_exist(out_dir)
+
+    gt_dir = osp.join(cityscapes_path, args.gt_dir)
+
+    poly_files = []
+    for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True):
+        poly_file = osp.join(gt_dir, poly)
+        poly_files.append(poly_file)
+    if args.nproc > 1:
+        mmcv.track_parallel_progress(convert_json_to_label, poly_files,
+                                     args.nproc)
+    else:
+        mmcv.track_progress(convert_json_to_label, poly_files)
+
+    split_names = ['train', 'val', 'test']
+
+    for split in split_names:
+        filenames = []
+        for poly in mmcv.scandir(
+                osp.join(gt_dir, split), '_polygons.json', recursive=True):
+            filenames.append(poly.replace('_gtFine_polygons.json', ''))
+        with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
+            f.writelines(f + '\n' for f in filenames)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/drive.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/drive.py
new file mode 100644
index 0000000000000000000000000000000000000000..891f06f725cc7be9da8c65bc0dc56008b8313e30
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/drive.py
@@ -0,0 +1,112 @@
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import cv2
+import mmcv
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert DRIVE dataset to mmsegmentation format')
+    parser.add_argument(
+        'training_path', help='the training part of DRIVE dataset')
+    parser.add_argument(
+        'testing_path', help='the testing part of DRIVE dataset')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    training_path = args.training_path
+    testing_path = args.testing_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'DRIVE')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mmcv.mkdir_or_exist(out_dir)
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting training.zip...')
+        zip_file = zipfile.ZipFile(training_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+        now_dir = osp.join(tmp_dir, 'training', 'images')
+        for img_name in os.listdir(now_dir):
+            img = mmcv.imread(osp.join(now_dir, img_name))
+            mmcv.imwrite(
+                img,
+                osp.join(
+                    out_dir, 'images', 'training',
+                    osp.splitext(img_name)[0].replace('_training', '') +
+                    '.png'))
+
+        now_dir = osp.join(tmp_dir, 'training', '1st_manual')
+        for img_name in os.listdir(now_dir):
+            cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+            ret, img = cap.read()
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(img_name)[0] + '.png'))
+
+        print('Extracting test.zip...')
+        zip_file = zipfile.ZipFile(testing_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating validation dataset...')
+        now_dir = osp.join(tmp_dir, 'test', 'images')
+        for img_name in os.listdir(now_dir):
+            img = mmcv.imread(osp.join(now_dir, img_name))
+            mmcv.imwrite(
+                img,
+                osp.join(
+                    out_dir, 'images', 'validation',
+                    osp.splitext(img_name)[0].replace('_test', '') + '.png'))
+
+        now_dir = osp.join(tmp_dir, 'test', '1st_manual')
+        if osp.exists(now_dir):
+            for img_name in os.listdir(now_dir):
+                cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+                ret, img = cap.read()
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
+        if osp.exists(now_dir):
+            for img_name in os.listdir(now_dir):
+                cap = cv2.VideoCapture(osp.join(now_dir, img_name))
+                ret, img = cap.read()
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/hrf.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdeb6e7e5668e097f30bc019c88f9eab6c7fcf07
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/hrf.py
@@ -0,0 +1,110 @@
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+
+HRF_LEN = 15
+TRAINING_LEN = 5
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert HRF dataset to mmsegmentation format')
+    parser.add_argument('healthy_path', help='the path of healthy.zip')
+    parser.add_argument(
+        'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
+    parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
+    parser.add_argument(
+        'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_path',
+        help='the path of diabetic_retinopathy.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_manualsegm_path',
+        help='the path of diabetic_retinopathy_manualsegm.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    images_path = [
+        args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
+    ]
+    annotations_path = [
+        args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
+        args.diabetic_retinopathy_manualsegm_path
+    ]
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'HRF')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mmcv.mkdir_or_exist(out_dir)
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    print('Generating images...')
+    for now_path in images_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
+
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+
+    print('Generating annotations...')
+    for now_path in annotations_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
+
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/pascal_context.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc49ab7ad8fd359c458ec4b6190ed61851426031
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/pascal_context.py
@@ -0,0 +1,86 @@
+import argparse
+import os.path as osp
+from functools import partial
+
+import mmcv
+import numpy as np
+from detail import Detail
+from PIL import Image
+
+_mapping = np.sort(
+    np.array([
+        0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
+        158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
+        440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
+        85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
+    ]))
+_key = np.array(range(len(_mapping))).astype('uint8')
+
+
+def generate_labels(img_id, detail, out_dir):
+
+    def _class_to_index(mask, _mapping, _key):
+        # assert the values
+        values = np.unique(mask)
+        for i in range(len(values)):
+            assert (values[i] in _mapping)
+        index = np.digitize(mask.ravel(), _mapping, right=True)
+        return _key[index].reshape(mask.shape)
+
+    mask = Image.fromarray(
+        _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
+    filename = img_id['file_name']
+    mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
+    return osp.splitext(osp.basename(filename))[0]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('json_path', help='annoation json filepath')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
+    else:
+        out_dir = args.out_dir
+    json_path = args.json_path
+    mmcv.mkdir_or_exist(out_dir)
+    img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
+
+    train_detail = Detail(json_path, img_dir, 'train')
+    train_ids = train_detail.getImgs()
+
+    val_detail = Detail(json_path, img_dir, 'val')
+    val_ids = val_detail.getImgs()
+
+    mmcv.mkdir_or_exist(
+        osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
+
+    train_list = mmcv.track_progress(
+        partial(generate_labels, detail=train_detail, out_dir=out_dir),
+        train_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'train.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(train_list))
+
+    val_list = mmcv.track_progress(
+        partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'val.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(val_list))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..6238d62f64de9406ef84ebb4667d7c0e1ce8a8c5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/stare.py
@@ -0,0 +1,165 @@
+import argparse
+import gzip
+import os
+import os.path as osp
+import tarfile
+import tempfile
+
+import mmcv
+
+STARE_LEN = 20
+TRAINING_LEN = 10
+
+
+def un_gz(src, dst):
+    g_file = gzip.GzipFile(src)
+    with open(dst, 'wb+') as f:
+        f.write(g_file.read())
+    g_file.close()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert STARE dataset to mmsegmentation format')
+    parser.add_argument('image_path', help='the path of stare-images.tar')
+    parser.add_argument('labels_ah', help='the path of labels-ah.tar')
+    parser.add_argument('labels_vk', help='the path of labels-vk.tar')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    image_path = args.image_path
+    labels_ah = args.labels_ah
+    labels_vk = args.labels_vk
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'STARE')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mmcv.mkdir_or_exist(out_dir)
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting stare-images.tar...')
+        with tarfile.open(image_path) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            'len(os.listdir(now_dir)) != {}'.format(STARE_LEN)
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting labels-ah.tar...')
+        with tarfile.open(labels_ah) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            'len(os.listdir(now_dir)) != {}'.format(STARE_LEN)
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            # The annotation img should be divided by 128, because some of
+            # the annotation imgs are not standard. We should set a threshold
+            # to convert the nonstandard annotation imgs. The value divided by
+            # 128 equivalent to '1 if value >= 128 else 0'
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting labels-vk.tar...')
+        with tarfile.open(labels_vk) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            'len(os.listdir(now_dir)) != {}'.format(STARE_LEN)
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/voc_aug.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/voc_aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..942746351b64b2e931cb18ce684a1f3ccf7e3866
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_datasets/voc_aug.py
@@ -0,0 +1,91 @@
+import argparse
+import os.path as osp
+from functools import partial
+
+import mmcv
+import numpy as np
+from PIL import Image
+from scipy.io import loadmat
+
+AUG_LEN = 10582
+
+
+def convert_mat(mat_file, in_dir, out_dir):
+    data = loadmat(osp.join(in_dir, mat_file))
+    mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+    seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
+    Image.fromarray(mask).save(seg_filename, 'PNG')
+
+
+def generate_aug_list(merged_list, excluded_list):
+    return list(set(merged_list) - set(excluded_list))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('aug_path', help='pascal voc aug path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    aug_path = args.aug_path
+    nproc = args.nproc
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
+    else:
+        out_dir = args.out_dir
+    mmcv.mkdir_or_exist(out_dir)
+    in_dir = osp.join(aug_path, 'dataset', 'cls')
+
+    mmcv.track_parallel_progress(
+        partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
+        list(mmcv.scandir(in_dir, suffix='.mat')),
+        nproc=nproc)
+
+    full_aug_list = []
+    with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+    with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'train.txt')) as f:
+        ori_train_list = [line.strip() for line in f]
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'val.txt')) as f:
+        val_list = [line.strip() for line in f]
+
+    aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
+                                       val_list)
+    assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
+        AUG_LEN)
+
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'trainaug.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in aug_train_list)
+
+    aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
+    assert len(aug_list) == AUG_LEN - len(
+        ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
+                                                      len(ori_train_list))
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
+            'w') as f:
+        f.writelines(line + '\n' for line in aug_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_model.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..a16122ea9d5c31a00a247a61bfab53b7b9efb2b1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/convert_model.py
@@ -0,0 +1,58 @@
+import argparse
+import copy
+import os
+import os.path as osp
+import time
+
+import mmcv
+import torch
+from mmcv.runner import init_dist
+from mmcv.utils import Config, DictAction, get_git_hash
+from IPython import embed
+from collections import OrderedDict
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a segmentor')
+    parser.add_argument('oldmodel', help='train config file path')
+    parser.add_argument('newmodel', help='train config file path')
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+
+    args = parse_args()
+    model = torch.load(args.oldmodel)
+    old_dict = model['state_dict']
+
+    new_dict = OrderedDict()
+
+    for old_key in old_dict.keys():
+        if 'hybrid_embed' in old_key:
+            new_key = old_key.replace('hybrid_embed', 'linear')
+            print("{} -> {}".format(old_key, new_key))
+        elif 'conv_seg' in old_key:
+            new_key = old_key.replace('conv_seg', 'linear_pred')
+            ncls = old_dict[old_key].shape[0]
+            if  'bias' in old_key:
+                rand_weight_bias = torch.randn(ncls)
+                new_dict[old_key] = rand_weight_bias
+            else:
+                rand_weight_conv = torch.randn(ncls, 128, 1, 1)
+                new_dict[old_key] = rand_weight_conv
+            print("{} -> {}".format(old_key, new_key))
+        else:
+            new_key = old_key
+
+        new_dict[new_key] = old_dict[old_key]
+
+
+    model['state_dict'] = new_dict
+    torch.save(model, args.newmodel)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/dist_test.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/dist_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..34fb46541d447d923bc3561bbba09bd5c94d7bfe
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/dist_test.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+PORT=${PORT:-29500}
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/dist_train.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/dist_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5b43fffbf28fc9b8ba7c14efcd5e4f8b19279470
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/dist_train.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+GPUS=$2
+PORT=${PORT:-29500}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/get_flops.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/get_flops.py
new file mode 100644
index 0000000000000000000000000000000000000000..26433693dc098c69086cbc366ef11a2cfa8c603c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/get_flops.py
@@ -0,0 +1,120 @@
+import argparse
+
+from mmcv import Config
+from mmcv.cnn import get_model_complexity_info
+from mmcv.cnn.utils.flops_counter import flops_to_string, params_to_string
+
+from mmseg.models import build_segmentor
+import torch
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a segmentor')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[2048, 1024],
+        help='input image size')
+    args = parser.parse_args()
+    return args
+
+def sra_flops(h, w, r, dim, num_heads):
+    dim_h = dim / num_heads
+    n1 = h * w
+    n2 = h / r * w / r
+
+    f1 = n1 * dim_h * n2 * num_heads
+    f2 = n1 * n2 * dim_h * num_heads
+
+    return f1 + f2
+
+
+def get_tr_flops(net, input_shape):
+    flops, params = get_model_complexity_info(net, input_shape, as_strings=False)
+    _, H, W = input_shape
+    net = net.backbone
+    try:
+        stage1 = sra_flops(H // 4, W // 4,
+                           net.block1[0].attn.sr_ratio,
+                           net.block1[0].attn.dim,
+                           net.block1[0].attn.num_heads) * len(net.block1)
+        stage2 = sra_flops(H // 8, W // 8,
+                           net.block2[0].attn.sr_ratio,
+                           net.block2[0].attn.dim,
+                           net.block2[0].attn.num_heads) * len(net.block2)
+        stage3 = sra_flops(H // 16, W // 16,
+                           net.block3[0].attn.sr_ratio,
+                           net.block3[0].attn.dim,
+                           net.block3[0].attn.num_heads) * len(net.block3)
+        stage4 = sra_flops(H // 32, W // 32,
+                           net.block4[0].attn.sr_ratio,
+                           net.block4[0].attn.dim,
+                           net.block4[0].attn.num_heads) * len(net.block4)
+    except:
+        stage1 = sra_flops(H // 4, W // 4,
+                           net.block1[0].attn.squeeze_ratio,
+                           64,
+                           net.block1[0].attn.num_heads) * len(net.block1)
+        stage2 = sra_flops(H // 8, W // 8,
+                           net.block2[0].attn.squeeze_ratio,
+                           128,
+                           net.block2[0].attn.num_heads) * len(net.block2)
+        stage3 = sra_flops(H // 16, W // 16,
+                           net.block3[0].attn.squeeze_ratio,
+                           320,
+                           net.block3[0].attn.num_heads) * len(net.block3)
+        stage4 = sra_flops(H // 32, W // 32,
+                           net.block4[0].attn.squeeze_ratio,
+                           512,
+                           net.block4[0].attn.num_heads) * len(net.block4)
+
+    print(stage1 + stage2 + stage3 + stage4)
+    flops += stage1 + stage2 + stage3 + stage4
+    return flops_to_string(flops), params_to_string(params)
+
+def main():
+
+    args = parse_args()
+
+    if len(args.shape) == 1:
+        input_shape = (3, args.shape[0], args.shape[0])
+    elif len(args.shape) == 2:
+        input_shape = (3, ) + tuple(args.shape)
+    else:
+        raise ValueError('invalid input shape')
+
+    cfg = Config.fromfile(args.config)
+    cfg.model.pretrained = None
+    model = build_segmentor(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg')).cuda()
+    model.eval()
+
+    if hasattr(model, 'forward_dummy'):
+        model.forward = model.forward_dummy
+    else:
+        raise NotImplementedError(
+            'FLOPs counter is currently not currently supported with {}'.
+            format(model.__class__.__name__))
+
+    # from IPython import embed; embed()
+    if hasattr(model.backbone, 'block1'):
+        print('#### get transformer flops ####')
+        with torch.no_grad():
+            flops, params = get_tr_flops(model, input_shape)
+    else:
+        print('#### get CNN flops ####')
+        flops, params = get_model_complexity_info(model, input_shape)
+
+    split_line = '=' * 30
+    print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
+        split_line, input_shape, flops, params))
+    print('!!!Please be cautious if you use the results in papers. '
+          'You may need to check if all ops are supported and verify that the '
+          'flops computation is correct.')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/print_config.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/print_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a0c67780a0620fff1d8748db28177ca3ddcc0ca
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/print_config.py
@@ -0,0 +1,28 @@
+import argparse
+
+from mmcv import Config, DictAction
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Print the whole config')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument(
+        '--options', nargs='+', action=DictAction, help='arguments in dict')
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.options is not None:
+        cfg.merge_from_dict(args.options)
+    print(f'Config:\n{cfg.pretty_text}')
+    # dump config
+    cfg.dump('example.py')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/publish_model.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/publish_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..a049f17674b7a1aa730057e5cc294d2368fe707c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/publish_model.py
@@ -0,0 +1,35 @@
+import argparse
+import subprocess
+
+import torch
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Process a checkpoint to be published')
+    parser.add_argument('in_file', help='input checkpoint filename')
+    parser.add_argument('out_file', help='output checkpoint filename')
+    args = parser.parse_args()
+    return args
+
+
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+    subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+    args = parse_args()
+    process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/pytorch2onnx.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/pytorch2onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ec9feb59a86db61a8368d2239f8ef0dcb461d41
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/pytorch2onnx.py
@@ -0,0 +1,200 @@
+import argparse
+from functools import partial
+
+import mmcv
+import numpy as np
+import onnxruntime as rt
+import torch
+import torch._C
+import torch.serialization
+from mmcv.onnx import register_extra_symbolics
+from mmcv.runner import load_checkpoint
+from torch import nn
+
+from mmseg.models import build_segmentor
+
+torch.manual_seed(3)
+
+
+def _convert_batchnorm(module):
+    module_output = module
+    if isinstance(module, torch.nn.SyncBatchNorm):
+        module_output = torch.nn.BatchNorm2d(module.num_features, module.eps,
+                                             module.momentum, module.affine,
+                                             module.track_running_stats)
+        if module.affine:
+            module_output.weight.data = module.weight.data.clone().detach()
+            module_output.bias.data = module.bias.data.clone().detach()
+            # keep requires_grad unchanged
+            module_output.weight.requires_grad = module.weight.requires_grad
+            module_output.bias.requires_grad = module.bias.requires_grad
+        module_output.running_mean = module.running_mean
+        module_output.running_var = module.running_var
+        module_output.num_batches_tracked = module.num_batches_tracked
+    for name, child in module.named_children():
+        module_output.add_module(name, _convert_batchnorm(child))
+    del module
+    return module_output
+
+
+def _demo_mm_inputs(input_shape, num_classes):
+    """Create a superset of inputs needed to run test or train batches.
+
+    Args:
+        input_shape (tuple):
+            input batch dimensions
+        num_classes (int):
+            number of semantic classes
+    """
+    (N, C, H, W) = input_shape
+    rng = np.random.RandomState(0)
+    imgs = rng.rand(*input_shape)
+    segs = rng.randint(
+        low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8)
+    img_metas = [{
+        'img_shape': (H, W, C),
+        'ori_shape': (H, W, C),
+        'pad_shape': (H, W, C),
+        'filename': '<demo>.png',
+        'scale_factor': 1.0,
+        'flip': False,
+    } for _ in range(N)]
+    mm_inputs = {
+        'imgs': torch.FloatTensor(imgs).requires_grad_(True),
+        'img_metas': img_metas,
+        'gt_semantic_seg': torch.LongTensor(segs)
+    }
+    return mm_inputs
+
+
+def pytorch2onnx(model,
+                 input_shape,
+                 opset_version=11,
+                 show=False,
+                 output_file='tmp.onnx',
+                 verify=False):
+    """Export Pytorch model to ONNX model and verify the outputs are same
+    between Pytorch and ONNX.
+
+    Args:
+        model (nn.Module): Pytorch model we want to export.
+        input_shape (tuple): Use this input shape to construct
+            the corresponding dummy input and execute the model.
+        opset_version (int): The onnx op version. Default: 11.
+        show (bool): Whether print the computation graph. Default: False.
+        output_file (string): The path to where we store the output ONNX model.
+            Default: `tmp.onnx`.
+        verify (bool): Whether compare the outputs between Pytorch and ONNX.
+            Default: False.
+    """
+    model.cpu().eval()
+
+    if isinstance(model.decode_head, nn.ModuleList):
+        num_classes = model.decode_head[-1].num_classes
+    else:
+        num_classes = model.decode_head.num_classes
+
+    mm_inputs = _demo_mm_inputs(input_shape, num_classes)
+
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+
+    img_list = [img[None, :] for img in imgs]
+    img_meta_list = [[img_meta] for img_meta in img_metas]
+
+    # replace original forward function
+    origin_forward = model.forward
+    model.forward = partial(
+        model.forward, img_metas=img_meta_list, return_loss=False)
+
+    register_extra_symbolics(opset_version)
+    with torch.no_grad():
+        torch.onnx.export(
+            model, (img_list, ),
+            output_file,
+            export_params=True,
+            keep_initializers_as_inputs=True,
+            verbose=show,
+            opset_version=opset_version)
+        print(f'Successfully exported ONNX model: {output_file}')
+    model.forward = origin_forward
+
+    if verify:
+        # check by onnx
+        import onnx
+        onnx_model = onnx.load(output_file)
+        onnx.checker.check_model(onnx_model)
+
+        # check the numerical value
+        # get pytorch output
+        pytorch_result = model(img_list, img_meta_list, return_loss=False)[0]
+
+        # get onnx output
+        input_all = [node.name for node in onnx_model.graph.input]
+        input_initializer = [
+            node.name for node in onnx_model.graph.initializer
+        ]
+        net_feed_input = list(set(input_all) - set(input_initializer))
+        assert (len(net_feed_input) == 1)
+        sess = rt.InferenceSession(output_file)
+        onnx_result = sess.run(
+            None, {net_feed_input[0]: img_list[0].detach().numpy()})[0]
+        if not np.allclose(pytorch_result, onnx_result):
+            raise ValueError(
+                'The outputs are different between Pytorch and ONNX')
+        print('The outputs are same between Pytorch and ONNX')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Convert MMSeg to ONNX')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('--checkpoint', help='checkpoint file', default=None)
+    parser.add_argument('--show', action='store_true', help='show onnx graph')
+    parser.add_argument(
+        '--verify', action='store_true', help='verify the onnx model')
+    parser.add_argument('--output-file', type=str, default='tmp.onnx')
+    parser.add_argument('--opset-version', type=int, default=11)
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[256, 256],
+        help='input image size')
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    if len(args.shape) == 1:
+        input_shape = (1, 3, args.shape[0], args.shape[0])
+    elif len(args.shape) == 2:
+        input_shape = (
+            1,
+            3,
+        ) + tuple(args.shape)
+    else:
+        raise ValueError('invalid input shape')
+
+    cfg = mmcv.Config.fromfile(args.config)
+    cfg.model.pretrained = None
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    segmentor = build_segmentor(
+        cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg'))
+    # convert SyncBN to BN
+    segmentor = _convert_batchnorm(segmentor)
+
+    if args.checkpoint:
+        load_checkpoint(segmentor, args.checkpoint, map_location='cpu')
+
+    # conver model to onnx file
+    pytorch2onnx(
+        segmentor,
+        input_shape,
+        opset_version=args.opset_version,
+        show=args.show,
+        output_file=args.output_file,
+        verify=args.verify)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/slurm_test.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/slurm_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4e6f7bf4e33267f269cf0f455924cb70166ccd4b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/slurm_test.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-4}
+GPUS_PER_NODE=${GPUS_PER_NODE:-4}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --cpus-per-task=${CPUS_PER_TASK} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/slurm_train.sh b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/slurm_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..078540149c14f2731f90383f1d2fe997ac772495
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/slurm_train.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME='mmseg'
+CONFIG=$2
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${@:3}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --cpus-per-task=${CPUS_PER_TASK} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/test.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2f3596056c87ce0fb18ceb9f570c841be5f3d7a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/test.py
@@ -0,0 +1,173 @@
+import argparse
+import os
+
+import mmcv
+import torch
+import apex
+from apex import amp
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+from mmcv.utils import DictAction
+
+from mmseg.apis import multi_gpu_test, single_gpu_test
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.models import build_segmentor
+#from IPython import embed
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='mmseg test (and eval) a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument(
+        '--aug-test', action='store_true', help='Use Flip and Multi scale aug')
+    parser.add_argument('--out', default='work_dirs/res.pkl', help='output result file in pickle format')
+    parser.add_argument(
+        '--format-only',
+        action='store_true',
+        help='Format the output results without perform evaluation. It is'
+        'useful when you want to format the result to a specific format and '
+        'submit it to the test server')
+    parser.add_argument(
+        '--eval',
+        type=str,
+        nargs='+',
+        default='mIoU',
+        help='evaluation metrics, which depends on the dataset, e.g., "mIoU"'
+        ' for generic datasets, and "cityscapes" for Cityscapes')
+    parser.add_argument('--show', action='store_true', help='show results')
+    parser.add_argument(
+        '--show-dir', help='directory where painted images will be saved')
+    parser.add_argument(
+        '--gpu-collect',
+        action='store_true',
+        help='whether to use gpu to collect results.')
+    parser.add_argument(
+        '--tmpdir',
+        help='tmp directory used for collecting results from multiple '
+        'workers, available when gpu_collect is not specified')
+    parser.add_argument(
+        '--options', nargs='+', action=DictAction, help='custom options')
+    parser.add_argument(
+        '--eval-options',
+        nargs='+',
+        action=DictAction,
+        help='custom options for evaluation')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+
+
+def main():
+    args = parse_args()
+
+    assert args.out or args.eval or args.format_only or args.show \
+        or args.show_dir, \
+        ('Please specify at least one operation (save/eval/format/show the '
+         'results / save the results) with the argument "--out", "--eval"'
+         ', "--format-only", "--show" or "--show-dir"')
+
+    if 'None' in args.eval:
+        args.eval = None
+    if args.eval and args.format_only:
+
+        raise ValueError('--eval and --format_only cannot be both specified')
+
+    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+        raise ValueError('The output file must be a pkl file.')
+
+    cfg = mmcv.Config.fromfile(args.config)
+    if args.options is not None:
+        cfg.merge_from_dict(args.options)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    if args.aug_test:
+        if cfg.data.test.type == 'CityscapesDataset':
+            # hard code index
+            cfg.data.test.pipeline[1].img_ratios = [
+                0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0
+            ]
+            cfg.data.test.pipeline[1].flip = True
+        elif cfg.data.test.type == 'ADE20KDataset':
+            # hard code index
+            cfg.data.test.pipeline[1].img_ratios = [
+                0.75, 0.875, 1.0, 1.125, 1.25
+            ]
+            cfg.data.test.pipeline[1].flip = True
+        else:
+            # hard code index
+            cfg.data.test.pipeline[1].img_ratios = [
+                0.5, 0.75, 1.0, 1.25, 1.5, 1.75
+            ]
+            cfg.data.test.pipeline[1].flip = True
+
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.workers_per_gpu,
+        dist=distributed,
+        shuffle=False)
+
+    # build the model and load checkpoint
+    cfg.model.train_cfg = None
+    model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))
+    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+    model.CLASSES = checkpoint['meta']['CLASSES']
+    model.PALETTE = checkpoint['meta']['PALETTE']
+    
+    model = model.to('npu:0')
+    optimizer = apex.optimizers.NpuFusedAdamW(model.parameters(), cfg.optimizer['lr'], weight_decay=cfg.optimizer['weight_decay']) #
+    model, optimizer = amp.initialize(
+        model, optimizer, opt_level='O2', loss_scale=128, combine_grad=True)
+    
+    efficient_test = True #False
+    if args.eval_options is not None:
+        efficient_test = args.eval_options.get('efficient_test', False)
+
+    if not distributed:
+        model = MMDataParallel(model, device_ids=[0])
+        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
+                                  efficient_test)
+    else:
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False)
+        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+                                 args.gpu_collect, efficient_test)
+
+    rank, _ = get_dist_info()
+    if rank == 0:
+        if args.out:
+            print(f'\nwriting results to {args.out}')
+            mmcv.dump(outputs, args.out)
+        kwargs = {} if args.eval_options is None else args.eval_options
+        if args.format_only:
+            dataset.format_results(outputs, **kwargs)
+        if args.eval:
+            dataset.evaluate(outputs, args.eval, **kwargs)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/train.py b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..1126467fb4d52b64e7484a722f254644ea2b7365
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/SegFormer/tools/train.py
@@ -0,0 +1,186 @@
+import argparse
+import copy
+import os
+import os.path as osp
+import time
+
+import mmcv
+import torch
+from mmcv.runner import init_dist
+from mmcv.utils import Config, DictAction, get_git_hash
+
+from mmseg import __version__
+from mmseg.apis import set_random_seed, train_segmentor
+from mmseg.datasets import build_dataset
+from mmseg.models import build_segmentor
+from mmseg.utils import collect_env, get_root_logger
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a segmentor')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work-dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--load-from', help='the checkpoint file to load weights from')
+    parser.add_argument(
+        '--data_path',
+        type=str)
+    parser.add_argument(
+        '--max_iters',
+        type=int)
+    parser.add_argument(
+        '--resume-from', help='the checkpoint file to resume from')
+    parser.add_argument(
+        '--no-validate',
+        action='store_true',
+        help='whether not to evaluate the checkpoint during training')
+    group_gpus = parser.add_mutually_exclusive_group()
+    group_gpus.add_argument(
+        '--gpus',
+        type=int,
+        help='number of gpus to use '
+        '(only applicable to non-distributed training)')
+    group_gpus.add_argument(
+        '--gpu-ids',
+        type=int,
+        nargs='+',
+        help='ids of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument('--seed', type=int, default=None, help='random seed')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
+    parser.add_argument(
+        '--options', nargs='+', action=DictAction, help='custom options')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+    return args
+
+
+def main():
+
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)   
+    if args.options is not None:
+        cfg.merge_from_dict(args.options)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+        
+    cfg.data.train.dataset['data_root'] = args.data_path 
+    cfg.data.val['data_root'] = args.data_path 
+    cfg.data.test['data_root'] = args.data_path  
+    
+    cfg.runner['max_iters'] = args.max_iters
+    
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0])
+    if args.load_from is not None:
+        cfg.load_from = args.load_from
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids
+    else:
+        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
+
+    if "WORLD_SIZE" in os.environ and int(os.environ["WORLD_SIZE"]) > 1:
+        distributed = True
+        rank = int(os.environ['RANK'])
+        num_gpus = torch.npu.device_count()
+        torch.npu.set_device(rank % num_gpus)
+        torch.distributed.init_process_group(backend='hccl', init_method='env://', world_size=int(os.environ['WORLD_SIZE']), rank=args.local_rank)
+    else:
+        distributed = False
+
+    # init distributed env first, since logger depends on the dist info.
+    #if args.launcher == 'none':
+    #    distributed = False
+    #else:
+    #    distributed = True
+    #    init_dist(args.launcher, **cfg.dist_params)
+
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # dump config
+    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
+    # init the logger before other steps
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
+    dash_line = '-' * 60 + '\n'
+    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                dash_line)
+    meta['env_info'] = env_info
+
+    # log some basic info
+    logger.info(f'Distributed training: {distributed}')
+    logger.info(f'Config:\n{cfg.pretty_text}')
+
+    # set random seeds
+    if args.seed is not None:
+        logger.info(f'Set random seed to {args.seed}, deterministic: '
+                    f'{args.deterministic}')
+        set_random_seed(args.seed, deterministic=args.deterministic)
+    cfg.seed = args.seed
+    meta['seed'] = args.seed
+    meta['exp_name'] = osp.basename(args.config)
+
+    model = build_segmentor(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg'))
+
+    logger.info(model)
+
+    datasets = [build_dataset(cfg.data.train)]
+
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        val_dataset.pipeline = cfg.data.train.pipeline
+        datasets.append(build_dataset(val_dataset))
+    if cfg.checkpoint_config is not None:
+        # save mmseg version, config file content and class names in
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmseg_version=f'{__version__}+{get_git_hash()[:7]}',
+            config=cfg.pretty_text,
+            CLASSES=datasets[0].CLASSES,
+            PALETTE=datasets[0].PALETTE)
+    # add an attribute for visualization convenience
+    model.CLASSES = datasets[0].CLASSES
+    train_segmentor(
+        model,
+        datasets,
+        cfg,
+        distributed=distributed,
+        validate=(not args.no_validate),
+        timestamp=timestamp,
+        meta=meta)
+
+
+if __name__ == '__main__':
+    main()