From fb277fbe8c6913057b321ffe4e80e030e326c13f Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Mon, 11 Dec 2023 17:33:34 +0800
Subject: [PATCH 1/7] add deeplabv3

---
 .../cv/segmentation/Deeplabv3/README.md       | 149 +++++++++
 .../cv/segmentation/Deeplabv3/export.py       |   8 +
 .../cv/segmentation/Deeplabv3/perf.py         |  98 ++++++
 .../cv/segmentation/Deeplabv3/presets.py      | 110 +++++++
 .../built-in/cv/segmentation/Deeplabv3/run.py | 207 ++++++++++++
 .../cv/segmentation/Deeplabv3/transforms.py   | 112 +++++++
 .../cv/segmentation/Deeplabv3/utils.py        | 300 ++++++++++++++++++
 .../cv/segmentation/Deeplabv3/v2_extras.py    |  83 +++++
 8 files changed, 1067 insertions(+)
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
new file mode 100644
index 0000000000..745f0d8eea
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
@@ -0,0 +1,149 @@
+# DeepLabV3模型-推理指导
+
+
+- [概述](#ZH-CN_TOPIC_0000001172161501)
+
+    - [输入输出数据](#section540883920406)
+
+- [推理环境准备](#ZH-CN_TOPIC_0000001126281702)
+
+- [快速上手](#ZH-CN_TOPIC_0000001126281700)
+
+  - [获取源码](#section4622531142816)
+  - [准备数据集](#section183221994411)
+  - [模型推理](#section741711594517)
+
+- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573)
+
+  ******
+
+
+
+
+
+# 概述<a name="ZH-CN_TOPIC_0000001172161501"></a>
+
+DeeplabV3是一个经典的图像语义分割网络，在v1和v2版本基础上进行改进，多尺度(multiple scales)分割物体，设计了串行和并行的带孔卷积模块，采用多种不同的atrous rates来获取多尺度的内容信息，提出 Atrous Spatial Pyramid Pooling(ASPP)模块, 挖掘不同尺度的卷积特征，以及编码了全局内容信息的图像层特征，提升图像分割效果。
+
+
+- 参考实现：
+
+  ```
+  url=https://github.com/open-mmlab/mmsegmentation.git
+  branch=master
+  commit_id=fa1554f1aaea9a2c58249b06e1ea48420091464d
+  model_name=DeeplabV3
+  ```
+
+
+
+## 输入输出数据<a name="section540883920406"></a>
+
+- 输入数据
+
+  | 输入数据 | 数据类型 | 大小                      | 数据排布格式 |
+  | -------- | -------- | ------------------------- | ------------ |
+  | input    | RGB_FP32 | batchsize x 3 x 520 x 520 | NCHW         |
+
+
+- 输出数据
+
+  | 输出数据 | 数据类型 | 大小     | 数据排布格式 |
+  | -------- | -------- | -------- | ------------ |
+  | output1  | FLOAT32  | batchsize x 21 x 520 x 520 | NCHW           |
+
+
+
+
+# 推理环境准备<a name="ZH-CN_TOPIC_0000001126281702"></a>
+
+- 该模型需要以下插件与驱动
+
+  **表 1**  版本配套表
+
+  | 配套  | 版本  | 环境准备指导  |
+  |---------| ------- | ------------------------------------------------------------ |
+  | 固件与驱动 | 23.0.rc1  | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) |
+  | CANN | 7.0.RC1.alpha003 | - |
+  | Python | 3.9.11 | - |
+  | PyTorch | 2.0.1 | - |
+  | Torch_AIE | 6.3.rc2 | - |
+
+
+
+# 快速上手<a name="ZH-CN_TOPIC_0000001126281700"></a>
+
+## 获取源码<a name="section4622531142816"></a>
+
+
+1. 安装依赖。
+
+   ```
+   pip3 install -r requirements.txt
+   ```
+
+## 准备数据集<a name="section183221994411"></a>
+
+1. 获取原始数据集。（解压命令参考tar –xvf  \*.tar与 unzip \*.zip）
+
+   本模型支持coco2017数据集。用户需自行获取数据集，其目录结构如下：
+
+   ```
+   coco2017
+   ├── val2017                 //验证集图片信息       
+   └── annotations             // 验证集标注信息
+   ```
+  
+
+
+## 模型推理<a name="section741711594517"></a>
+
+1. 模型转换。
+
+   使用PyTorch将模型权重文件.pth转换为.ts文件。
+
+   1. 导出ts模型。
+
+      1. 使用`export.py`导出ts文件（请确保网络可连接至torch.hub）。
+
+         ```
+         python3 export.py
+         ```
+
+         获得`deeplabv3_resnet50.ts`文件。
+
+   2. 精度测试
+
+      1. 使用`run.py`执行数据集上的模型推理
+
+         ```
+         python3 run.py --dataset_path ./coco2017 --ts_path ./deeplabv3_resnet50.ts
+         ```
+
+        - 参数说明
+          - dataset_path：数据集所在目录
+          - ts_model：模型文件路径
+
+   3. 性能验证
+
+      1. 使用`perf.py`执行PSENet的性能测试
+
+         ```
+         python3 perf.py --mode ts --ts_path ./deeplabv3_resnet50.ts --batch_size 1 --opt_level 1
+         ```
+
+        - 参数说明
+          - mode：使用ts模型进行推理
+          - ts_path：ts模型文件所在路径
+          - batch_size：batch数
+          - opt_level：模型优化参数
+
+
+
+# 模型推理性能&精度<a name="ZH-CN_TOPIC_0000001172201573"></a>
+
+调用ACL接口推理计算，性能参考下列数据。
+
+| 芯片型号 | Batch Size | 数据集 | 精度 | 性能 |
+| -------- | ---------- | ------ | ---- | ---- |
+|     310P3     |     1       |   coco2017     |   mIOU=64.5%   |   40FPS   |
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py
new file mode 100644
index 0000000000..a1a6d9203c
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py
@@ -0,0 +1,8 @@
+import torch
+
+model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
+model.eval()
+input_data = torch.ones(1, 3, 520, 520)
+ts_model = torch.jit.trace(model, input_data, strict=False)
+ts_model.save("./deeplabv3_resnet50.ts")
+print(f'finish save model')
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
new file mode 100644
index 0000000000..a56bf0f7d0
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
@@ -0,0 +1,98 @@
+"""
+切python2.0.1环境
+"""
+
+import argparse
+import time
+from tqdm import tqdm
+
+import torch
+import numpy as np
+
+import torch_aie
+from torch_aie import _enums
+from ais_bench.infer.interface import InferSession
+
+
+INPUT_WIDTH = 520
+INPUT_HEIGHT = 520
+
+
+def parse_args():
+    args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.")
+    args.add_argument("--mode", choices=["om", "ts"], required=True, help="Specify the mode ('om' or 'ts').")
+    args.add_argument('--om_path',help='MobilenetV1 om file path', type=str,
+                        default='/onnx/mobilenetv1/mobilenet-v1_bs1.om'
+                        )
+    args.add_argument('--ts_path',help='MobilenetV1 ts file path', type=str,
+                        default='./deeplabv3_resnet50.ts'
+                        )
+    args.add_argument("--batch_size", type=int, default=4, help="batch size.")
+    args.add_argument("--opt_level", type=int, default=0, help="opt level.")
+    return args.parse_args()
+
+if __name__ == '__main__':
+    infer_times = 100
+    om_cost = 0
+    pt_cost = 0
+    opts = parse_args()
+    OM_PATH = opts.om_path
+    TS_PATH = opts.ts_path
+    BATCH_SIZE = opts.batch_size
+    OPTS_LEVEL = opts.opt_level
+
+    if opts.mode == "om":
+        om_model = InferSession(0, OM_PATH)
+        for _ in tqdm(range(0, infer_times)):
+            dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32)
+            start = time.time()
+            output = om_model.infer([dummy_input], 'static', custom_sizes=90000000) # revise static
+            cost = time.time() - start
+            om_cost += cost
+
+    if opts.mode == "ts":
+        ts_model = torch.jit.load(TS_PATH)
+        
+        input_info = [torch_aie.Input((BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT))]
+        
+        torch_aie.set_device(0)
+        print("start compile")
+        torchaie_model = torch_aie.compile(
+            ts_model,
+            inputs=input_info,
+            precision_policy=_enums.PrecisionPolicy.FP16,
+            soc_version='Ascend310P3',
+            optimization_level=OPTS_LEVEL
+        )
+        print("end compile")
+        torchaie_model.eval()
+        
+        dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32)
+        input_tensor = torch.Tensor(dummy_input)
+        input_tensor = input_tensor.to("npu:0")
+        loops = 100
+        warm_ctr = 10
+        
+        default_stream = torch_aie.npu.default_stream()   
+        time_cost = 0
+        
+        while warm_ctr:
+            _ = torchaie_model(input_tensor)
+            default_stream.synchronize()
+            warm_ctr -= 1
+
+        for i in range(loops):
+            t0 = time.time()
+            _ = torchaie_model(input_tensor)
+            default_stream.synchronize()
+            t1 = time.time()
+            time_cost += (t1 - t0)
+
+        print(f"fps: {loops} * {BATCH_SIZE} / {time_cost : .3f} samples/s")
+        print("torch_aie fps: ", loops * BATCH_SIZE / time_cost)
+        
+        from datetime import datetime
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+        print("Current Time:", formatted_time)
+
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py
new file mode 100644
index 0000000000..20c4ca36ca
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py
@@ -0,0 +1,110 @@
+import torch
+
+
+def get_modules(use_v2):
+    # We need a protected import to avoid the V2 warning in case just V1 is used
+    if use_v2:
+        import torchvision.transforms.v2
+        import torchvision.tv_tensors
+        import v2_extras
+
+        return torchvision.transforms.v2, torchvision.tv_tensors, v2_extras
+    else:
+        import transforms
+
+        return transforms, None, None
+
+
+class SegmentationPresetTrain:
+    def __init__(
+        self,
+        *,
+        base_size,
+        crop_size,
+        hflip_prob=0.5,
+        mean=(0.485, 0.456, 0.406),
+        std=(0.229, 0.224, 0.225),
+        backend="pil",
+        use_v2=False,
+    ):
+        T, tv_tensors, v2_extras = get_modules(use_v2)
+
+        transforms = []
+        backend = backend.lower()
+        if backend == "tv_tensor":
+            transforms.append(T.ToImage())
+        elif backend == "tensor":
+            transforms.append(T.PILToTensor())
+        elif backend != "pil":
+            raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
+
+        transforms += [T.RandomResize(min_size=int(0.5 * base_size), max_size=int(2.0 * base_size))]
+
+        if hflip_prob > 0:
+            transforms += [T.RandomHorizontalFlip(hflip_prob)]
+
+        if use_v2:
+            # We need a custom pad transform here, since the padding we want to perform here is fundamentally
+            # different from the padding in `RandomCrop` if `pad_if_needed=True`.
+            transforms += [v2_extras.PadIfSmaller(crop_size, fill={tv_tensors.Mask: 255, "others": 0})]
+
+        transforms += [T.RandomCrop(crop_size)]
+
+        if backend == "pil":
+            transforms += [T.PILToTensor()]
+
+        if use_v2:
+            img_type = tv_tensors.Image if backend == "tv_tensor" else torch.Tensor
+            transforms += [
+                T.ToDtype(dtype={img_type: torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True)
+            ]
+        else:
+            # No need to explicitly convert masks as they're magically int64 already
+            transforms += [T.ToDtype(torch.float, scale=True)]
+
+        transforms += [T.Normalize(mean=mean, std=std)]
+        if use_v2:
+            transforms += [T.ToPureTensor()]
+
+        self.transforms = T.Compose(transforms)
+
+    def __call__(self, img, target):
+        return self.transforms(img, target)
+
+
+class SegmentationPresetEval:
+    def __init__(
+        self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), backend="pil", use_v2=False
+    ):
+        T, _, _ = get_modules(use_v2)
+
+        transforms = []
+        backend = backend.lower()
+        if backend == "tensor":
+            transforms += [T.PILToTensor()]
+        elif backend == "tv_tensor":
+            transforms += [T.ToImage()]
+        elif backend != "pil":
+            raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
+
+        if use_v2:
+            transforms += [T.Resize(size=(base_size, base_size))]
+        else:
+            # transforms += [T.RandomResize(min_size=base_size, max_size=base_size)]
+            transforms += [T.Resize(size=(base_size, base_size))]
+
+        if backend == "pil":
+            # Note: we could just convert to pure tensors even in v2?
+            transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
+
+        transforms += [
+            T.ToDtype(torch.float, scale=True),
+            T.Normalize(mean=mean, std=std),
+        ]
+        if use_v2:
+            transforms += [T.ToPureTensor()]
+
+        self.transforms = T.Compose(transforms)
+
+    def __call__(self, img, target):
+        return self.transforms(img, target)
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
new file mode 100644
index 0000000000..fb95548c51
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
@@ -0,0 +1,207 @@
+import os
+import copy
+import warnings
+import argparse
+
+import torch
+import torchvision
+from PIL import Image
+from pycocotools import mask as coco_mask
+
+import utils
+import presets
+from transforms import Compose
+import torch_aie
+from torch_aie import _enums
+
+
+NUM_CLASSES = 21
+USE_V2 = False
+USE_NPU = False
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+
+
+def get_transform(is_train, args):
+    if is_train:
+        return presets.SegmentationPresetTrain(base_size=520, crop_size=480, backend=args.backend, use_v2=USE_V2)
+    else:
+        return presets.SegmentationPresetEval(base_size=520, backend="PIL", use_v2=USE_V2)
+    
+
+def _coco_remove_images_without_annotations(dataset, cat_list=None):
+    def _has_valid_annotation(anno):
+        if len(anno) == 0:
+            return False
+        return sum(obj["area"] for obj in anno) > 1000
+
+    ids = []
+    for ds_idx, img_id in enumerate(dataset.ids):
+        ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
+        anno = dataset.coco.loadAnns(ann_ids)
+        if cat_list:
+            anno = [obj for obj in anno if obj["category_id"] in cat_list]
+        if _has_valid_annotation(anno):
+            ids.append(ds_idx)
+
+    dataset = torch.utils.data.Subset(dataset, ids)
+    return dataset
+    
+
+class FilterAndRemapCocoCategories:
+    def __init__(self, categories, remap=True):
+        self.categories = categories
+        self.remap = remap
+
+    def __call__(self, image, anno):
+        anno = [obj for obj in anno if obj["category_id"] in self.categories]
+        if not self.remap:
+            return image, anno
+        anno = copy.deepcopy(anno)
+        for obj in anno:
+            obj["category_id"] = self.categories.index(obj["category_id"])
+        return image, anno
+    
+
+class ConvertCocoPolysToMask:
+    def __call__(self, image, anno):
+        w, h = image.size
+        segmentations = [obj["segmentation"] for obj in anno]
+        cats = [obj["category_id"] for obj in anno]
+        if segmentations:
+            masks = convert_coco_poly_to_mask(segmentations, h, w)
+            cats = torch.as_tensor(cats, dtype=masks.dtype)
+            target, _ = (masks * cats[:, None, None]).max(dim=0)
+            target[masks.sum(0) > 1] = 255
+        else:
+            target = torch.zeros((h, w), dtype=torch.uint8)
+        target = Image.fromarray(target.numpy())
+        return image, target
+
+    
+def get_coco(root, image_set, transforms):
+    PATHS = {
+        "train": ("train2017", os.path.join("annotations", "instances_train2017.json")),
+        "val": ("val2017", os.path.join("annotations", "instances_val2017.json")),
+    }
+    CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72]
+
+    img_folder, ann_file = PATHS[image_set]
+    img_folder = os.path.join(root, img_folder)
+    ann_file = os.path.join(root, ann_file)
+
+    if USE_V2:
+        import v2_extras
+        from torchvision.datasets import wrap_dataset_for_transforms_v2
+
+        transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms])
+        dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
+        dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"})
+    else:
+        transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms])
+        dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
+
+    if image_set == "train":
+        dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST)
+
+    return dataset
+
+
+def evaluate(model, data_loader, device, num_classes):
+    model.eval()
+    confmat = utils.ConfusionMatrix(num_classes)
+    metric_logger = utils.MetricLogger(delimiter="  ")
+    header = "Test:"
+    num_processed_samples = 0
+    with torch.inference_mode():
+        ctr = 0
+        for image, target in metric_logger.log_every(data_loader, 100, header):
+            print(ctr)
+            ctr += 1
+            image, target = image.to(device), target.to(device)
+            output = model(image)
+            output = output["out"]
+            
+            if USE_NPU:
+                output = output.to("cpu")
+                target = target.to("cpu")
+
+            confmat.update(target.flatten(), output.argmax(1).flatten())
+            num_processed_samples += image.shape[0]
+            print(confmat)
+
+        confmat.reduce_from_all_processes()
+
+    num_processed_samples = utils.reduce_across_processes(num_processed_samples)
+    if (
+        hasattr(data_loader.dataset, "__len__")
+        and len(data_loader.dataset) != num_processed_samples
+        and torch.distributed.get_rank() == 0
+    ):
+        warnings.warn(
+            f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} "
+            "samples were used for the validation, which might bias the results. "
+            "Try adjusting the batch size and / or the world size. "
+            "Setting the world size to 1 is always a safe bet."
+        )
+
+    return confmat
+
+
+def parse_args():
+    args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.")
+    args.add_argument('--dataset_path',help='coco2017 dataset path', type=str,
+                        default='./coco2017'
+                        ) # "/home/ascend/coco2017"
+    args.add_argument('--ts_path',help='DeeplabV3 ts path', type=str,
+                        default='./deeplabv3_resnet50.ts'
+                        )
+    return args.parse_args()
+
+
+def main(dataset_path, ts_path):
+    dataset = get_coco(root=dataset_path, image_set="val", transforms=get_transform(is_train=False, args=None))
+    model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
+
+    test_sampler = torch.utils.data.SequentialSampler(dataset)
+    data_loader_test = torch.utils.data.DataLoader(
+        dataset, batch_size=1, sampler=test_sampler, collate_fn=utils.collate_fn
+    )
+    device = "cpu"
+    if USE_NPU:
+        device = "npu:0"
+        ts_model = torch.jit.load(ts_path)
+        input_info = [torch_aie.Input((1, 3, 520, 520))]
+        torch_aie.set_device(0)
+        print("start compile")
+        model = torch_aie.compile(
+            ts_model,
+            inputs=input_info,
+            precision_policy=_enums.PrecisionPolicy.FP16,
+            soc_version='Ascend310P3'
+        )
+        print("end compile")
+    confmat = evaluate(model, data_loader_test, device=device, num_classes=NUM_CLASSES)
+    print(confmat)
+        
+
+if __name__ == "__main__":
+    args = parse_args()
+    dataset_path = args.dataset_path
+    ts_path = args.ts_path
+    main(dataset_path)
\ No newline at end of file
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py
new file mode 100644
index 0000000000..ce0c59581a
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py
@@ -0,0 +1,112 @@
+import random
+
+import numpy as np
+import torch
+from torchvision import transforms as T
+from torchvision.transforms import functional as F
+
+
+def pad_if_smaller(img, size, fill=0):
+    min_size = min(img.size)
+    if min_size < size:
+        ow, oh = img.size
+        padh = size - oh if oh < size else 0
+        padw = size - ow if ow < size else 0
+        img = F.pad(img, (0, 0, padw, padh), fill=fill)
+    return img
+
+
+class Compose:
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, image, target):
+        for t in self.transforms:
+            image, target = t(image, target)
+        return image, target
+
+
+class RandomResize:
+    def __init__(self, min_size, max_size=None):
+        self.min_size = min_size
+        if max_size is None:
+            max_size = min_size
+        self.max_size = max_size
+
+    def __call__(self, image, target):
+        size = random.randint(self.min_size, self.max_size)
+        image = F.resize(image, size, antialias=True)
+        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
+        return image, target
+    
+class Resize:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, image, target):
+        image = F.resize(image, self.size, antialias=True)
+        target = F.resize(target, self.size, interpolation=T.InterpolationMode.NEAREST)
+        return image, target
+
+
+class RandomHorizontalFlip:
+    def __init__(self, flip_prob):
+        self.flip_prob = flip_prob
+
+    def __call__(self, image, target):
+        if random.random() < self.flip_prob:
+            image = F.hflip(image)
+            target = F.hflip(target)
+        return image, target
+
+
+class RandomCrop:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, image, target):
+        image = pad_if_smaller(image, self.size)
+        target = pad_if_smaller(target, self.size, fill=255)
+        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
+        image = F.crop(image, *crop_params)
+        target = F.crop(target, *crop_params)
+        return image, target
+
+
+class CenterCrop:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, image, target):
+        image = F.center_crop(image, self.size)
+        target = F.center_crop(target, self.size)
+        return image, target
+
+
+class PILToTensor:
+    def __call__(self, image, target):
+        image = F.pil_to_tensor(image)
+        target = torch.as_tensor(np.array(target), dtype=torch.int64)
+        return image, target
+
+
+class ToDtype:
+    def __init__(self, dtype, scale=False):
+        self.dtype = dtype
+        self.scale = scale
+
+    def __call__(self, image, target):
+        if not self.scale:
+            return image.to(dtype=self.dtype), target
+        image = F.convert_image_dtype(image, self.dtype)
+        return image, target
+
+
+class Normalize:
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, image, target):
+        image = F.normalize(image, mean=self.mean, std=self.std)
+        return image, target
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py
new file mode 100644
index 0000000000..92db189985
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py
@@ -0,0 +1,300 @@
+import datetime
+import errno
+import os
+import time
+from collections import defaultdict, deque
+
+import torch
+import torch.distributed as dist
+
+
+class SmoothedValue:
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        t = reduce_across_processes([self.count, self.total])
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
+        )
+
+
+class ConfusionMatrix:
+    def __init__(self, num_classes):
+        self.num_classes = num_classes
+        self.mat = None
+
+    def update(self, a, b):
+        n = self.num_classes
+        if self.mat is None:
+            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
+        with torch.inference_mode():
+            k = (a >= 0) & (a < n)
+            inds = n * a[k].to(torch.int64) + b[k]
+            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
+
+    def reset(self):
+        self.mat.zero_()
+
+    def compute(self):
+        h = self.mat.float()
+        acc_global = torch.diag(h).sum() / h.sum()
+        acc = torch.diag(h) / h.sum(1)
+        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
+        return acc_global, acc, iu
+
+    def reduce_from_all_processes(self):
+        self.mat = reduce_across_processes(self.mat).to(torch.int64)
+
+    def __str__(self):
+        acc_global, acc, iu = self.compute()
+        return ("global correct: {:.1f}\naverage row correct: {}\nIoU: {}\nmean IoU: {:.1f}").format(
+            acc_global.item() * 100,
+            [f"{i:.1f}" for i in (acc * 100).tolist()],
+            [f"{i:.1f}" for i in (iu * 100).tolist()],
+            iu.mean().item() * 100,
+        )
+
+
+class MetricLogger:
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            if not isinstance(v, (float, int)):
+                raise TypeError(
+                    f"This method expects the value of the input arguments to be of type float or int, instead  got {type(v)}"
+                )
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(f"{name}: {str(meter)}")
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ""
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join(
+                [
+                    header,
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                    "max mem: {memory:.0f}",
+                ]
+            )
+        else:
+            log_msg = self.delimiter.join(
+                [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
+            )
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
+                else:
+                    print(
+                        log_msg.format(
+                            i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
+                        )
+                    )
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print(f"{header} Total time: {total_time_str}")
+
+
+def cat_list(images, fill_value=0):
+    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
+    batch_shape = (len(images),) + max_size
+    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
+    for img, pad_img in zip(images, batched_imgs):
+        pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
+    return batched_imgs
+
+
+def collate_fn(batch):
+    images, targets = list(zip(*batch))
+    batched_imgs = cat_list(images, fill_value=0)
+    batched_targets = cat_list(targets, fill_value=255)
+    return batched_imgs, batched_targets
+
+
+def mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ["WORLD_SIZE"])
+        args.gpu = int(os.environ["LOCAL_RANK"])
+    # elif "SLURM_PROCID" in os.environ:
+    #     args.rank = int(os.environ["SLURM_PROCID"])
+    #     args.gpu = args.rank % torch.cuda.device_count()
+    elif hasattr(args, "rank"):
+        pass
+    else:
+        print("Not using distributed mode")
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = "nccl"
+    print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
+    torch.distributed.init_process_group(
+        backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
+    )
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+
+
+def reduce_across_processes(val):
+    if not is_dist_avail_and_initialized():
+        # nothing to sync, but we still convert to tensor for consistency with the distributed case.
+        return torch.tensor(val)
+
+    t = torch.tensor(val, device="cuda")
+    dist.barrier()
+    dist.all_reduce(t)
+    return t
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py
new file mode 100644
index 0000000000..e1a8b53e02
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py
@@ -0,0 +1,83 @@
+"""This file only exists to be lazy-imported and avoid V2-related import warnings when just using V1."""
+import torch
+from torchvision import tv_tensors
+from torchvision.transforms import v2
+
+
+class PadIfSmaller(v2.Transform):
+    def __init__(self, size, fill=0):
+        super().__init__()
+        self.size = size
+        self.fill = v2._utils._setup_fill_arg(fill)
+
+    def _get_params(self, sample):
+        _, height, width = v2._utils.query_chw(sample)
+        padding = [0, 0, max(self.size - width, 0), max(self.size - height, 0)]
+        needs_padding = any(padding)
+        return dict(padding=padding, needs_padding=needs_padding)
+
+    def _transform(self, inpt, params):
+        if not params["needs_padding"]:
+            return inpt
+
+        fill = v2._utils._get_fill(self.fill, type(inpt))
+        fill = v2._utils._convert_fill_arg(fill)
+
+        return v2.functional.pad(inpt, padding=params["padding"], fill=fill)
+
+
+class CocoDetectionToVOCSegmentation(v2.Transform):
+    """Turn samples from datasets.CocoDetection into the same format as VOCSegmentation.
+
+    This is achieved in two steps:
+
+    1. COCO differentiates between 91 categories while VOC only supports 21, including background for both. Fortunately,
+       the COCO categories are a superset of the VOC ones and thus can be mapped. Instances of the 70 categories not
+       present in VOC are dropped and replaced by background.
+    2. COCO only offers detection masks, i.e. a (N, H, W) bool-ish tensor, where the truthy values in each individual
+       mask denote the instance. However, a segmentation mask is a (H, W) integer tensor (typically torch.uint8), where
+       the value of each pixel denotes the category it belongs to. The detection masks are merged into one segmentation
+       mask while pixels that belong to multiple detection masks are marked as invalid.
+    """
+
+    COCO_TO_VOC_LABEL_MAP = dict(
+        zip(
+            [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72],
+            range(21),
+        )
+    )
+    INVALID_VALUE = 255
+
+    def _coco_detection_masks_to_voc_segmentation_mask(self, target):
+        if "masks" not in target:
+            return None
+
+        instance_masks, instance_labels_coco = target["masks"], target["labels"]
+
+        valid_labels_voc = [
+            (idx, label_voc)
+            for idx, label_coco in enumerate(instance_labels_coco.tolist())
+            if (label_voc := self.COCO_TO_VOC_LABEL_MAP.get(label_coco)) is not None
+        ]
+
+        if not valid_labels_voc:
+            return None
+
+        valid_voc_category_idcs, instance_labels_voc = zip(*valid_labels_voc)
+
+        instance_masks = instance_masks[list(valid_voc_category_idcs)].to(torch.uint8)
+        instance_labels_voc = torch.tensor(instance_labels_voc, dtype=torch.uint8)
+
+        # Calling `.max()` on the stacked detection masks works fine to separate background from foreground as long as
+        # there is at most a single instance per pixel. Overlapping instances will be filtered out in the next step.
+        segmentation_mask, _ = (instance_masks * instance_labels_voc.reshape(-1, 1, 1)).max(dim=0)
+        segmentation_mask[instance_masks.sum(dim=0) > 1] = self.INVALID_VALUE
+
+        return segmentation_mask
+
+    def forward(self, image, target):
+        segmentation_mask = self._coco_detection_masks_to_voc_segmentation_mask(target)
+        if segmentation_mask is None:
+            segmentation_mask = torch.zeros(v2.functional.get_size(image), dtype=torch.uint8)
+
+        return image, tv_tensors.Mask(segmentation_mask)
-- 
Gitee


From 73cd59f51b14eb7a1735414558ef71e7e0f9ea20 Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Tue, 12 Dec 2023 11:05:18 +0800
Subject: [PATCH 2/7] fix bug

---
 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
index fb95548c51..4b77062e00 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
@@ -204,4 +204,4 @@ if __name__ == "__main__":
     args = parse_args()
     dataset_path = args.dataset_path
     ts_path = args.ts_path
-    main(dataset_path)
\ No newline at end of file
+    main(dataset_path, ts_path)
\ No newline at end of file
-- 
Gitee


From a27afc3d955477fd2b5a8ca41b364a996279f316 Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Tue, 12 Dec 2023 15:08:08 +0800
Subject: [PATCH 3/7] 1

---
 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
index 745f0d8eea..19833d6226 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
@@ -146,4 +146,4 @@ DeeplabV3是一个经典的图像语义分割网络，在v1和v2版本基础上
 
 | 芯片型号 | Batch Size | 数据集 | 精度 | 性能 |
 | -------- | ---------- | ------ | ---- | ---- |
-|     310P3     |     1       |   coco2017     |   mIOU=64.5%   |   40FPS   |
+|     310P3     |     1       |   coco2017     |   mIOU=64.5%   |   30FPS   |
-- 
Gitee


From 58f735ae89f4ef95cab4c5926b97c6d65c7b4699 Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Tue, 19 Dec 2023 10:15:39 +0800
Subject: [PATCH 4/7] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=94=B9=E5=9B=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../cv/segmentation/Deeplabv3/README.md       |  16 ++-
 .../cv/segmentation/Deeplabv3/deeplabv3.py    | 103 ++++++++++++++++++
 .../cv/segmentation/Deeplabv3/perf.py         |   2 +-
 .../built-in/cv/segmentation/Deeplabv3/run.py |   1 -
 4 files changed, 115 insertions(+), 7 deletions(-)
 create mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py

diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
index 19833d6226..d4f586ca73 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
@@ -112,24 +112,30 @@ DeeplabV3是一个经典的图像语义分割网络，在v1和v2版本基础上
 
          获得`deeplabv3_resnet50.ts`文件。
 
-   2. 精度测试
+   2. 改图
+
+      1. 将刚获得的ts文件解压，`unzip deeplabv3_resnet50.ts`，获得`deeplabv3_resnet50`文件夹
+      2. 将`deeplabv3_resnet50/code/__torch__/torchvision/models/segmentation/deeplabv3.py`替换为本项目提供的`deeplabv3.py`
+      3. 完成替换后，重新压缩：`zip -r deeplabv3_resnet50.zip deeplabv3_resnet50`
+
+   3. 精度测试
 
       1. 使用`run.py`执行数据集上的模型推理
 
          ```
-         python3 run.py --dataset_path ./coco2017 --ts_path ./deeplabv3_resnet50.ts
+         python3 run.py --dataset_path ./coco2017 --ts_path ./deeplabv3_resnet50.zip
          ```
 
         - 参数说明
           - dataset_path：数据集所在目录
           - ts_model：模型文件路径
 
-   3. 性能验证
+   4. 性能验证
 
       1. 使用`perf.py`执行PSENet的性能测试
 
          ```
-         python3 perf.py --mode ts --ts_path ./deeplabv3_resnet50.ts --batch_size 1 --opt_level 1
+         python3 perf.py --mode ts --ts_path ./deeplabv3_resnet50.zip --batch_size 1 --opt_level 1
          ```
 
         - 参数说明
@@ -146,4 +152,4 @@ DeeplabV3是一个经典的图像语义分割网络，在v1和v2版本基础上
 
 | 芯片型号 | Batch Size | 数据集 | 精度 | 性能 |
 | -------- | ---------- | ------ | ---- | ---- |
-|     310P3     |     1       |   coco2017     |   mIOU=64.5%   |   30FPS   |
+|     310P3     |     1       |   coco2017     |   mIOU=64.5%   |   66FPS   |
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py
new file mode 100644
index 0000000000..16fb2c9253
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py
@@ -0,0 +1,103 @@
+class DeepLabV3(Module):
+  __parameters__ = []
+  __buffers__ = []
+  training : bool
+  _is_full_backward_hook : Optional[bool]
+  backbone : __torch__.torchvision.models._utils.IntermediateLayerGetter
+  classifier : __torch__.torchvision.models.segmentation.deeplabv3.DeepLabHead
+  aux_classifier : __torch__.torchvision.models.segmentation.fcn.FCNHead
+  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.DeepLabV3,
+    x: Tensor):
+    aux_classifier = self.aux_classifier
+    classifier = self.classifier
+    backbone = self.backbone
+    _0 = ops.prim.NumToTensor(torch.size(x, 2))
+    _1 = int(_0)
+    _2 = int(_0)
+    _3 = ops.prim.NumToTensor(torch.size(x, 3))
+    _4 = int(_3)
+    _5 = int(_3)
+    _6, _7, = (backbone).forward(x, )
+    _8 = torch.upsample_bilinear2d((classifier).forward(_6, ), [_2, _5], False, None)
+    _9 = torch.upsample_bilinear2d((aux_classifier).forward(_7, ), [_1, _4], False, None)
+    return _8
+class DeepLabHead(Module):
+  __parameters__ = []
+  __buffers__ = []
+  training : bool
+  _is_full_backward_hook : Optional[bool]
+  __annotations__["0"] = __torch__.torchvision.models.segmentation.deeplabv3.ASPP
+  __annotations__["1"] = __torch__.torch.nn.modules.conv.___torch_mangle_164.Conv2d
+  __annotations__["2"] = __torch__.torch.nn.modules.batchnorm.___torch_mangle_165.BatchNorm2d
+  __annotations__["3"] = __torch__.torch.nn.modules.activation.___torch_mangle_166.ReLU
+  __annotations__["4"] = __torch__.torch.nn.modules.conv.___torch_mangle_167.Conv2d
+  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.DeepLabHead,
+    argument_1: Tensor) -> Tensor:
+    _4 = getattr(self, "4")
+    _3 = getattr(self, "3")
+    _2 = getattr(self, "2")
+    _1 = getattr(self, "1")
+    _0 = getattr(self, "0")
+    _10 = (_1).forward((_0).forward(argument_1, ), )
+    _11 = (_4).forward((_3).forward((_2).forward(_10, ), ), )
+    return _11
+class ASPP(Module):
+  __parameters__ = []
+  __buffers__ = []
+  training : bool
+  _is_full_backward_hook : Optional[bool]
+  convs : __torch__.torch.nn.modules.container.ModuleList
+  project : __torch__.torch.nn.modules.container.___torch_mangle_163.Sequential
+  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.ASPP,
+    argument_1: Tensor) -> Tensor:
+    project = self.project
+    convs = self.convs
+    _4 = getattr(convs, "4")
+    convs0 = self.convs
+    _3 = getattr(convs0, "3")
+    convs1 = self.convs
+    _2 = getattr(convs1, "2")
+    convs2 = self.convs
+    _1 = getattr(convs2, "1")
+    convs3 = self.convs
+    _0 = getattr(convs3, "0")
+    _12 = [(_0).forward(argument_1, ), (_1).forward(argument_1, ), (_2).forward(argument_1, ), (_3).forward(argument_1, ), (_4).forward(argument_1, )]
+    input = torch.cat(_12, 1)
+    return (project).forward(input, )
+class ASPPConv(Module):
+  __parameters__ = []
+  __buffers__ = []
+  training : bool
+  _is_full_backward_hook : Optional[bool]
+  __annotations__["0"] = __torch__.torch.nn.modules.conv.___torch_mangle_146.Conv2d
+  __annotations__["1"] = __torch__.torch.nn.modules.batchnorm.___torch_mangle_147.BatchNorm2d
+  __annotations__["2"] = __torch__.torch.nn.modules.activation.___torch_mangle_148.ReLU
+  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.ASPPConv,
+    argument_1: Tensor) -> Tensor:
+    _2 = getattr(self, "2")
+    _1 = getattr(self, "1")
+    _0 = getattr(self, "0")
+    _13 = (_1).forward((_0).forward(argument_1, ), )
+    return (_2).forward(_13, )
+class ASPPPooling(Module):
+  __parameters__ = []
+  __buffers__ = []
+  training : bool
+  _is_full_backward_hook : Optional[bool]
+  __annotations__["0"] = __torch__.torch.nn.modules.pooling.AdaptiveAvgPool2d
+  __annotations__["1"] = __torch__.torch.nn.modules.conv.___torch_mangle_157.Conv2d
+  __annotations__["2"] = __torch__.torch.nn.modules.batchnorm.___torch_mangle_158.BatchNorm2d
+  __annotations__["3"] = __torch__.torch.nn.modules.activation.___torch_mangle_159.ReLU
+  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.ASPPPooling,
+    argument_1: Tensor) -> Tensor:
+    _3 = getattr(self, "3")
+    _2 = getattr(self, "2")
+    _1 = getattr(self, "1")
+    _0 = getattr(self, "0")
+    _14 = ops.prim.NumToTensor(torch.size(argument_1, 2))
+    _15 = int(_14)
+    _16 = ops.prim.NumToTensor(torch.size(argument_1, 3))
+    _17 = int(_16)
+    _18 = (_1).forward((_0).forward(argument_1, ), )
+    _19 = torch.upsample_bilinear2d((_3).forward((_2).forward(_18, ), ), [_15, _17], False, None)
+    return _19
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
index a56bf0f7d0..406d8787d2 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
@@ -25,7 +25,7 @@ def parse_args():
                         default='/onnx/mobilenetv1/mobilenet-v1_bs1.om'
                         )
     args.add_argument('--ts_path',help='MobilenetV1 ts file path', type=str,
-                        default='./deeplabv3_resnet50.ts'
+                        default='./deeplabv3_resnet50.zip'
                         )
     args.add_argument("--batch_size", type=int, default=4, help="batch size.")
     args.add_argument("--opt_level", type=int, default=0, help="opt level.")
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
index 4b77062e00..f045f08470 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
@@ -135,7 +135,6 @@ def evaluate(model, data_loader, device, num_classes):
             ctr += 1
             image, target = image.to(device), target.to(device)
             output = model(image)
-            output = output["out"]
             
             if USE_NPU:
                 output = output.to("cpu")
-- 
Gitee


From 7992787696acb396710c9cf5dea86306638934bd Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Tue, 19 Dec 2023 11:09:56 +0800
Subject: [PATCH 5/7] 1

---
 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
index f045f08470..0da64732e0 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
@@ -17,7 +17,7 @@ from torch_aie import _enums
 
 NUM_CLASSES = 21
 USE_V2 = False
-USE_NPU = False
+USE_NPU = True
 
 
 def convert_coco_poly_to_mask(segmentations, height, width):
-- 
Gitee


From 05186fb1cb215a43e7a652e60182efb23760aa91 Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Tue, 19 Dec 2023 11:13:42 +0800
Subject: [PATCH 6/7] 1

---
 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
index 0da64732e0..c4bf9e8b90 100644
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
@@ -168,7 +168,7 @@ def parse_args():
                         default='./coco2017'
                         ) # "/home/ascend/coco2017"
     args.add_argument('--ts_path',help='DeeplabV3 ts path', type=str,
-                        default='./deeplabv3_resnet50.ts'
+                        default='./deeplabv3_resnet50.zip'
                         )
     return args.parse_args()
 
-- 
Gitee


From 4bb4824555b14b604a1c57f15890fe9e459b5bb5 Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <chenguanzhong@huawei.com>
Date: Tue, 19 Dec 2023 15:33:31 +0800
Subject: [PATCH 7/7] 1

---
 .../cv/segmentation/Deeplabv3/README.md       | 155 ------------------
 .../cv/segmentation/Deeplabv3/deeplabv3.py    | 103 ------------
 2 files changed, 258 deletions(-)
 delete mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
 delete mode 100644 AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py

diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
deleted file mode 100644
index d4f586ca73..0000000000
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/README.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# DeepLabV3模型-推理指导
-
-
-- [概述](#ZH-CN_TOPIC_0000001172161501)
-
-    - [输入输出数据](#section540883920406)
-
-- [推理环境准备](#ZH-CN_TOPIC_0000001126281702)
-
-- [快速上手](#ZH-CN_TOPIC_0000001126281700)
-
-  - [获取源码](#section4622531142816)
-  - [准备数据集](#section183221994411)
-  - [模型推理](#section741711594517)
-
-- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573)
-
-  ******
-
-
-
-
-
-# 概述<a name="ZH-CN_TOPIC_0000001172161501"></a>
-
-DeeplabV3是一个经典的图像语义分割网络，在v1和v2版本基础上进行改进，多尺度(multiple scales)分割物体，设计了串行和并行的带孔卷积模块，采用多种不同的atrous rates来获取多尺度的内容信息，提出 Atrous Spatial Pyramid Pooling(ASPP)模块, 挖掘不同尺度的卷积特征，以及编码了全局内容信息的图像层特征，提升图像分割效果。
-
-
-- 参考实现：
-
-  ```
-  url=https://github.com/open-mmlab/mmsegmentation.git
-  branch=master
-  commit_id=fa1554f1aaea9a2c58249b06e1ea48420091464d
-  model_name=DeeplabV3
-  ```
-
-
-
-## 输入输出数据<a name="section540883920406"></a>
-
-- 输入数据
-
-  | 输入数据 | 数据类型 | 大小                      | 数据排布格式 |
-  | -------- | -------- | ------------------------- | ------------ |
-  | input    | RGB_FP32 | batchsize x 3 x 520 x 520 | NCHW         |
-
-
-- 输出数据
-
-  | 输出数据 | 数据类型 | 大小     | 数据排布格式 |
-  | -------- | -------- | -------- | ------------ |
-  | output1  | FLOAT32  | batchsize x 21 x 520 x 520 | NCHW           |
-
-
-
-
-# 推理环境准备<a name="ZH-CN_TOPIC_0000001126281702"></a>
-
-- 该模型需要以下插件与驱动
-
-  **表 1**  版本配套表
-
-  | 配套  | 版本  | 环境准备指导  |
-  |---------| ------- | ------------------------------------------------------------ |
-  | 固件与驱动 | 23.0.rc1  | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) |
-  | CANN | 7.0.RC1.alpha003 | - |
-  | Python | 3.9.11 | - |
-  | PyTorch | 2.0.1 | - |
-  | Torch_AIE | 6.3.rc2 | - |
-
-
-
-# 快速上手<a name="ZH-CN_TOPIC_0000001126281700"></a>
-
-## 获取源码<a name="section4622531142816"></a>
-
-
-1. 安装依赖。
-
-   ```
-   pip3 install -r requirements.txt
-   ```
-
-## 准备数据集<a name="section183221994411"></a>
-
-1. 获取原始数据集。（解压命令参考tar –xvf  \*.tar与 unzip \*.zip）
-
-   本模型支持coco2017数据集。用户需自行获取数据集，其目录结构如下：
-
-   ```
-   coco2017
-   ├── val2017                 //验证集图片信息       
-   └── annotations             // 验证集标注信息
-   ```
-  
-
-
-## 模型推理<a name="section741711594517"></a>
-
-1. 模型转换。
-
-   使用PyTorch将模型权重文件.pth转换为.ts文件。
-
-   1. 导出ts模型。
-
-      1. 使用`export.py`导出ts文件（请确保网络可连接至torch.hub）。
-
-         ```
-         python3 export.py
-         ```
-
-         获得`deeplabv3_resnet50.ts`文件。
-
-   2. 改图
-
-      1. 将刚获得的ts文件解压，`unzip deeplabv3_resnet50.ts`，获得`deeplabv3_resnet50`文件夹
-      2. 将`deeplabv3_resnet50/code/__torch__/torchvision/models/segmentation/deeplabv3.py`替换为本项目提供的`deeplabv3.py`
-      3. 完成替换后，重新压缩：`zip -r deeplabv3_resnet50.zip deeplabv3_resnet50`
-
-   3. 精度测试
-
-      1. 使用`run.py`执行数据集上的模型推理
-
-         ```
-         python3 run.py --dataset_path ./coco2017 --ts_path ./deeplabv3_resnet50.zip
-         ```
-
-        - 参数说明
-          - dataset_path：数据集所在目录
-          - ts_model：模型文件路径
-
-   4. 性能验证
-
-      1. 使用`perf.py`执行PSENet的性能测试
-
-         ```
-         python3 perf.py --mode ts --ts_path ./deeplabv3_resnet50.zip --batch_size 1 --opt_level 1
-         ```
-
-        - 参数说明
-          - mode：使用ts模型进行推理
-          - ts_path：ts模型文件所在路径
-          - batch_size：batch数
-          - opt_level：模型优化参数
-
-
-
-# 模型推理性能&精度<a name="ZH-CN_TOPIC_0000001172201573"></a>
-
-调用ACL接口推理计算，性能参考下列数据。
-
-| 芯片型号 | Batch Size | 数据集 | 精度 | 性能 |
-| -------- | ---------- | ------ | ---- | ---- |
-|     310P3     |     1       |   coco2017     |   mIOU=64.5%   |   66FPS   |
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py
deleted file mode 100644
index 16fb2c9253..0000000000
--- a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/deeplabv3.py
+++ /dev/null
@@ -1,103 +0,0 @@
-class DeepLabV3(Module):
-  __parameters__ = []
-  __buffers__ = []
-  training : bool
-  _is_full_backward_hook : Optional[bool]
-  backbone : __torch__.torchvision.models._utils.IntermediateLayerGetter
-  classifier : __torch__.torchvision.models.segmentation.deeplabv3.DeepLabHead
-  aux_classifier : __torch__.torchvision.models.segmentation.fcn.FCNHead
-  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.DeepLabV3,
-    x: Tensor):
-    aux_classifier = self.aux_classifier
-    classifier = self.classifier
-    backbone = self.backbone
-    _0 = ops.prim.NumToTensor(torch.size(x, 2))
-    _1 = int(_0)
-    _2 = int(_0)
-    _3 = ops.prim.NumToTensor(torch.size(x, 3))
-    _4 = int(_3)
-    _5 = int(_3)
-    _6, _7, = (backbone).forward(x, )
-    _8 = torch.upsample_bilinear2d((classifier).forward(_6, ), [_2, _5], False, None)
-    _9 = torch.upsample_bilinear2d((aux_classifier).forward(_7, ), [_1, _4], False, None)
-    return _8
-class DeepLabHead(Module):
-  __parameters__ = []
-  __buffers__ = []
-  training : bool
-  _is_full_backward_hook : Optional[bool]
-  __annotations__["0"] = __torch__.torchvision.models.segmentation.deeplabv3.ASPP
-  __annotations__["1"] = __torch__.torch.nn.modules.conv.___torch_mangle_164.Conv2d
-  __annotations__["2"] = __torch__.torch.nn.modules.batchnorm.___torch_mangle_165.BatchNorm2d
-  __annotations__["3"] = __torch__.torch.nn.modules.activation.___torch_mangle_166.ReLU
-  __annotations__["4"] = __torch__.torch.nn.modules.conv.___torch_mangle_167.Conv2d
-  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.DeepLabHead,
-    argument_1: Tensor) -> Tensor:
-    _4 = getattr(self, "4")
-    _3 = getattr(self, "3")
-    _2 = getattr(self, "2")
-    _1 = getattr(self, "1")
-    _0 = getattr(self, "0")
-    _10 = (_1).forward((_0).forward(argument_1, ), )
-    _11 = (_4).forward((_3).forward((_2).forward(_10, ), ), )
-    return _11
-class ASPP(Module):
-  __parameters__ = []
-  __buffers__ = []
-  training : bool
-  _is_full_backward_hook : Optional[bool]
-  convs : __torch__.torch.nn.modules.container.ModuleList
-  project : __torch__.torch.nn.modules.container.___torch_mangle_163.Sequential
-  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.ASPP,
-    argument_1: Tensor) -> Tensor:
-    project = self.project
-    convs = self.convs
-    _4 = getattr(convs, "4")
-    convs0 = self.convs
-    _3 = getattr(convs0, "3")
-    convs1 = self.convs
-    _2 = getattr(convs1, "2")
-    convs2 = self.convs
-    _1 = getattr(convs2, "1")
-    convs3 = self.convs
-    _0 = getattr(convs3, "0")
-    _12 = [(_0).forward(argument_1, ), (_1).forward(argument_1, ), (_2).forward(argument_1, ), (_3).forward(argument_1, ), (_4).forward(argument_1, )]
-    input = torch.cat(_12, 1)
-    return (project).forward(input, )
-class ASPPConv(Module):
-  __parameters__ = []
-  __buffers__ = []
-  training : bool
-  _is_full_backward_hook : Optional[bool]
-  __annotations__["0"] = __torch__.torch.nn.modules.conv.___torch_mangle_146.Conv2d
-  __annotations__["1"] = __torch__.torch.nn.modules.batchnorm.___torch_mangle_147.BatchNorm2d
-  __annotations__["2"] = __torch__.torch.nn.modules.activation.___torch_mangle_148.ReLU
-  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.ASPPConv,
-    argument_1: Tensor) -> Tensor:
-    _2 = getattr(self, "2")
-    _1 = getattr(self, "1")
-    _0 = getattr(self, "0")
-    _13 = (_1).forward((_0).forward(argument_1, ), )
-    return (_2).forward(_13, )
-class ASPPPooling(Module):
-  __parameters__ = []
-  __buffers__ = []
-  training : bool
-  _is_full_backward_hook : Optional[bool]
-  __annotations__["0"] = __torch__.torch.nn.modules.pooling.AdaptiveAvgPool2d
-  __annotations__["1"] = __torch__.torch.nn.modules.conv.___torch_mangle_157.Conv2d
-  __annotations__["2"] = __torch__.torch.nn.modules.batchnorm.___torch_mangle_158.BatchNorm2d
-  __annotations__["3"] = __torch__.torch.nn.modules.activation.___torch_mangle_159.ReLU
-  def forward(self: __torch__.torchvision.models.segmentation.deeplabv3.ASPPPooling,
-    argument_1: Tensor) -> Tensor:
-    _3 = getattr(self, "3")
-    _2 = getattr(self, "2")
-    _1 = getattr(self, "1")
-    _0 = getattr(self, "0")
-    _14 = ops.prim.NumToTensor(torch.size(argument_1, 2))
-    _15 = int(_14)
-    _16 = ops.prim.NumToTensor(torch.size(argument_1, 3))
-    _17 = int(_16)
-    _18 = (_1).forward((_0).forward(argument_1, ), )
-    _19 = torch.upsample_bilinear2d((_3).forward((_2).forward(_18, ), ), [_15, _17], False, None)
-    return _19
-- 
Gitee