diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1a6d9203c5b65aa0c11eee0d157033480f6ce71
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py
@@ -0,0 +1,8 @@
+import torch
+
+model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
+model.eval()
+input_data = torch.ones(1, 3, 520, 520)
+ts_model = torch.jit.trace(model, input_data, strict=False)
+ts_model.save("./deeplabv3_resnet50.ts")
+print(f'finish save model')
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
new file mode 100644
index 0000000000000000000000000000000000000000..406d8787d2fb3f30f427624b0a1ea07f6a4fbac0
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py
@@ -0,0 +1,98 @@
+"""
+切python2.0.1环境
+"""
+
+import argparse
+import time
+from tqdm import tqdm
+
+import torch
+import numpy as np
+
+import torch_aie
+from torch_aie import _enums
+from ais_bench.infer.interface import InferSession
+
+
+INPUT_WIDTH = 520
+INPUT_HEIGHT = 520
+
+
+def parse_args():
+    args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.")
+    args.add_argument("--mode", choices=["om", "ts"], required=True, help="Specify the mode ('om' or 'ts').")
+    args.add_argument('--om_path',help='MobilenetV1 om file path', type=str,
+                        default='/onnx/mobilenetv1/mobilenet-v1_bs1.om'
+                        )
+    args.add_argument('--ts_path',help='MobilenetV1 ts file path', type=str,
+                        default='./deeplabv3_resnet50.zip'
+                        )
+    args.add_argument("--batch_size", type=int, default=4, help="batch size.")
+    args.add_argument("--opt_level", type=int, default=0, help="opt level.")
+    return args.parse_args()
+
+if __name__ == '__main__':
+    infer_times = 100
+    om_cost = 0
+    pt_cost = 0
+    opts = parse_args()
+    OM_PATH = opts.om_path
+    TS_PATH = opts.ts_path
+    BATCH_SIZE = opts.batch_size
+    OPTS_LEVEL = opts.opt_level
+
+    if opts.mode == "om":
+        om_model = InferSession(0, OM_PATH)
+        for _ in tqdm(range(0, infer_times)):
+            dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32)
+            start = time.time()
+            output = om_model.infer([dummy_input], 'static', custom_sizes=90000000) # revise static
+            cost = time.time() - start
+            om_cost += cost
+
+    if opts.mode == "ts":
+        ts_model = torch.jit.load(TS_PATH)
+        
+        input_info = [torch_aie.Input((BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT))]
+        
+        torch_aie.set_device(0)
+        print("start compile")
+        torchaie_model = torch_aie.compile(
+            ts_model,
+            inputs=input_info,
+            precision_policy=_enums.PrecisionPolicy.FP16,
+            soc_version='Ascend310P3',
+            optimization_level=OPTS_LEVEL
+        )
+        print("end compile")
+        torchaie_model.eval()
+        
+        dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32)
+        input_tensor = torch.Tensor(dummy_input)
+        input_tensor = input_tensor.to("npu:0")
+        loops = 100
+        warm_ctr = 10
+        
+        default_stream = torch_aie.npu.default_stream()   
+        time_cost = 0
+        
+        while warm_ctr:
+            _ = torchaie_model(input_tensor)
+            default_stream.synchronize()
+            warm_ctr -= 1
+
+        for i in range(loops):
+            t0 = time.time()
+            _ = torchaie_model(input_tensor)
+            default_stream.synchronize()
+            t1 = time.time()
+            time_cost += (t1 - t0)
+
+        print(f"fps: {loops} * {BATCH_SIZE} / {time_cost : .3f} samples/s")
+        print("torch_aie fps: ", loops * BATCH_SIZE / time_cost)
+        
+        from datetime import datetime
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+        print("Current Time:", formatted_time)
+
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py
new file mode 100644
index 0000000000000000000000000000000000000000..20c4ca36ca562156d87db88e9a0ea503db2b0520
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py
@@ -0,0 +1,110 @@
+import torch
+
+
+def get_modules(use_v2):
+    # We need a protected import to avoid the V2 warning in case just V1 is used
+    if use_v2:
+        import torchvision.transforms.v2
+        import torchvision.tv_tensors
+        import v2_extras
+
+        return torchvision.transforms.v2, torchvision.tv_tensors, v2_extras
+    else:
+        import transforms
+
+        return transforms, None, None
+
+
+class SegmentationPresetTrain:
+    def __init__(
+        self,
+        *,
+        base_size,
+        crop_size,
+        hflip_prob=0.5,
+        mean=(0.485, 0.456, 0.406),
+        std=(0.229, 0.224, 0.225),
+        backend="pil",
+        use_v2=False,
+    ):
+        T, tv_tensors, v2_extras = get_modules(use_v2)
+
+        transforms = []
+        backend = backend.lower()
+        if backend == "tv_tensor":
+            transforms.append(T.ToImage())
+        elif backend == "tensor":
+            transforms.append(T.PILToTensor())
+        elif backend != "pil":
+            raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
+
+        transforms += [T.RandomResize(min_size=int(0.5 * base_size), max_size=int(2.0 * base_size))]
+
+        if hflip_prob > 0:
+            transforms += [T.RandomHorizontalFlip(hflip_prob)]
+
+        if use_v2:
+            # We need a custom pad transform here, since the padding we want to perform here is fundamentally
+            # different from the padding in `RandomCrop` if `pad_if_needed=True`.
+            transforms += [v2_extras.PadIfSmaller(crop_size, fill={tv_tensors.Mask: 255, "others": 0})]
+
+        transforms += [T.RandomCrop(crop_size)]
+
+        if backend == "pil":
+            transforms += [T.PILToTensor()]
+
+        if use_v2:
+            img_type = tv_tensors.Image if backend == "tv_tensor" else torch.Tensor
+            transforms += [
+                T.ToDtype(dtype={img_type: torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True)
+            ]
+        else:
+            # No need to explicitly convert masks as they're magically int64 already
+            transforms += [T.ToDtype(torch.float, scale=True)]
+
+        transforms += [T.Normalize(mean=mean, std=std)]
+        if use_v2:
+            transforms += [T.ToPureTensor()]
+
+        self.transforms = T.Compose(transforms)
+
+    def __call__(self, img, target):
+        return self.transforms(img, target)
+
+
+class SegmentationPresetEval:
+    def __init__(
+        self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), backend="pil", use_v2=False
+    ):
+        T, _, _ = get_modules(use_v2)
+
+        transforms = []
+        backend = backend.lower()
+        if backend == "tensor":
+            transforms += [T.PILToTensor()]
+        elif backend == "tv_tensor":
+            transforms += [T.ToImage()]
+        elif backend != "pil":
+            raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
+
+        if use_v2:
+            transforms += [T.Resize(size=(base_size, base_size))]
+        else:
+            # transforms += [T.RandomResize(min_size=base_size, max_size=base_size)]
+            transforms += [T.Resize(size=(base_size, base_size))]
+
+        if backend == "pil":
+            # Note: we could just convert to pure tensors even in v2?
+            transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
+
+        transforms += [
+            T.ToDtype(torch.float, scale=True),
+            T.Normalize(mean=mean, std=std),
+        ]
+        if use_v2:
+            transforms += [T.ToPureTensor()]
+
+        self.transforms = T.Compose(transforms)
+
+    def __call__(self, img, target):
+        return self.transforms(img, target)
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4bf9e8b90b8e4f9abb9aa5f0c7187555bb24217
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py
@@ -0,0 +1,206 @@
+import os
+import copy
+import warnings
+import argparse
+
+import torch
+import torchvision
+from PIL import Image
+from pycocotools import mask as coco_mask
+
+import utils
+import presets
+from transforms import Compose
+import torch_aie
+from torch_aie import _enums
+
+
+NUM_CLASSES = 21
+USE_V2 = False
+USE_NPU = True
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+
+
+def get_transform(is_train, args):
+    if is_train:
+        return presets.SegmentationPresetTrain(base_size=520, crop_size=480, backend=args.backend, use_v2=USE_V2)
+    else:
+        return presets.SegmentationPresetEval(base_size=520, backend="PIL", use_v2=USE_V2)
+    
+
+def _coco_remove_images_without_annotations(dataset, cat_list=None):
+    def _has_valid_annotation(anno):
+        if len(anno) == 0:
+            return False
+        return sum(obj["area"] for obj in anno) > 1000
+
+    ids = []
+    for ds_idx, img_id in enumerate(dataset.ids):
+        ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
+        anno = dataset.coco.loadAnns(ann_ids)
+        if cat_list:
+            anno = [obj for obj in anno if obj["category_id"] in cat_list]
+        if _has_valid_annotation(anno):
+            ids.append(ds_idx)
+
+    dataset = torch.utils.data.Subset(dataset, ids)
+    return dataset
+    
+
+class FilterAndRemapCocoCategories:
+    def __init__(self, categories, remap=True):
+        self.categories = categories
+        self.remap = remap
+
+    def __call__(self, image, anno):
+        anno = [obj for obj in anno if obj["category_id"] in self.categories]
+        if not self.remap:
+            return image, anno
+        anno = copy.deepcopy(anno)
+        for obj in anno:
+            obj["category_id"] = self.categories.index(obj["category_id"])
+        return image, anno
+    
+
+class ConvertCocoPolysToMask:
+    def __call__(self, image, anno):
+        w, h = image.size
+        segmentations = [obj["segmentation"] for obj in anno]
+        cats = [obj["category_id"] for obj in anno]
+        if segmentations:
+            masks = convert_coco_poly_to_mask(segmentations, h, w)
+            cats = torch.as_tensor(cats, dtype=masks.dtype)
+            target, _ = (masks * cats[:, None, None]).max(dim=0)
+            target[masks.sum(0) > 1] = 255
+        else:
+            target = torch.zeros((h, w), dtype=torch.uint8)
+        target = Image.fromarray(target.numpy())
+        return image, target
+
+    
+def get_coco(root, image_set, transforms):
+    PATHS = {
+        "train": ("train2017", os.path.join("annotations", "instances_train2017.json")),
+        "val": ("val2017", os.path.join("annotations", "instances_val2017.json")),
+    }
+    CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72]
+
+    img_folder, ann_file = PATHS[image_set]
+    img_folder = os.path.join(root, img_folder)
+    ann_file = os.path.join(root, ann_file)
+
+    if USE_V2:
+        import v2_extras
+        from torchvision.datasets import wrap_dataset_for_transforms_v2
+
+        transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms])
+        dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
+        dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"})
+    else:
+        transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms])
+        dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
+
+    if image_set == "train":
+        dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST)
+
+    return dataset
+
+
+def evaluate(model, data_loader, device, num_classes):
+    model.eval()
+    confmat = utils.ConfusionMatrix(num_classes)
+    metric_logger = utils.MetricLogger(delimiter="  ")
+    header = "Test:"
+    num_processed_samples = 0
+    with torch.inference_mode():
+        ctr = 0
+        for image, target in metric_logger.log_every(data_loader, 100, header):
+            print(ctr)
+            ctr += 1
+            image, target = image.to(device), target.to(device)
+            output = model(image)
+            
+            if USE_NPU:
+                output = output.to("cpu")
+                target = target.to("cpu")
+
+            confmat.update(target.flatten(), output.argmax(1).flatten())
+            num_processed_samples += image.shape[0]
+            print(confmat)
+
+        confmat.reduce_from_all_processes()
+
+    num_processed_samples = utils.reduce_across_processes(num_processed_samples)
+    if (
+        hasattr(data_loader.dataset, "__len__")
+        and len(data_loader.dataset) != num_processed_samples
+        and torch.distributed.get_rank() == 0
+    ):
+        warnings.warn(
+            f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} "
+            "samples were used for the validation, which might bias the results. "
+            "Try adjusting the batch size and / or the world size. "
+            "Setting the world size to 1 is always a safe bet."
+        )
+
+    return confmat
+
+
+def parse_args():
+    args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.")
+    args.add_argument('--dataset_path',help='coco2017 dataset path', type=str,
+                        default='./coco2017'
+                        ) # "/home/ascend/coco2017"
+    args.add_argument('--ts_path',help='DeeplabV3 ts path', type=str,
+                        default='./deeplabv3_resnet50.zip'
+                        )
+    return args.parse_args()
+
+
+def main(dataset_path, ts_path):
+    dataset = get_coco(root=dataset_path, image_set="val", transforms=get_transform(is_train=False, args=None))
+    model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
+
+    test_sampler = torch.utils.data.SequentialSampler(dataset)
+    data_loader_test = torch.utils.data.DataLoader(
+        dataset, batch_size=1, sampler=test_sampler, collate_fn=utils.collate_fn
+    )
+    device = "cpu"
+    if USE_NPU:
+        device = "npu:0"
+        ts_model = torch.jit.load(ts_path)
+        input_info = [torch_aie.Input((1, 3, 520, 520))]
+        torch_aie.set_device(0)
+        print("start compile")
+        model = torch_aie.compile(
+            ts_model,
+            inputs=input_info,
+            precision_policy=_enums.PrecisionPolicy.FP16,
+            soc_version='Ascend310P3'
+        )
+        print("end compile")
+    confmat = evaluate(model, data_loader_test, device=device, num_classes=NUM_CLASSES)
+    print(confmat)
+        
+
+if __name__ == "__main__":
+    args = parse_args()
+    dataset_path = args.dataset_path
+    ts_path = args.ts_path
+    main(dataset_path, ts_path)
\ No newline at end of file
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce0c59581acf328aaf35ff94bb3ff4e699eb650c
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py
@@ -0,0 +1,112 @@
+import random
+
+import numpy as np
+import torch
+from torchvision import transforms as T
+from torchvision.transforms import functional as F
+
+
+def pad_if_smaller(img, size, fill=0):
+    min_size = min(img.size)
+    if min_size < size:
+        ow, oh = img.size
+        padh = size - oh if oh < size else 0
+        padw = size - ow if ow < size else 0
+        img = F.pad(img, (0, 0, padw, padh), fill=fill)
+    return img
+
+
+class Compose:
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, image, target):
+        for t in self.transforms:
+            image, target = t(image, target)
+        return image, target
+
+
+class RandomResize:
+    def __init__(self, min_size, max_size=None):
+        self.min_size = min_size
+        if max_size is None:
+            max_size = min_size
+        self.max_size = max_size
+
+    def __call__(self, image, target):
+        size = random.randint(self.min_size, self.max_size)
+        image = F.resize(image, size, antialias=True)
+        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
+        return image, target
+    
+class Resize:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, image, target):
+        image = F.resize(image, self.size, antialias=True)
+        target = F.resize(target, self.size, interpolation=T.InterpolationMode.NEAREST)
+        return image, target
+
+
+class RandomHorizontalFlip:
+    def __init__(self, flip_prob):
+        self.flip_prob = flip_prob
+
+    def __call__(self, image, target):
+        if random.random() < self.flip_prob:
+            image = F.hflip(image)
+            target = F.hflip(target)
+        return image, target
+
+
+class RandomCrop:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, image, target):
+        image = pad_if_smaller(image, self.size)
+        target = pad_if_smaller(target, self.size, fill=255)
+        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
+        image = F.crop(image, *crop_params)
+        target = F.crop(target, *crop_params)
+        return image, target
+
+
+class CenterCrop:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, image, target):
+        image = F.center_crop(image, self.size)
+        target = F.center_crop(target, self.size)
+        return image, target
+
+
+class PILToTensor:
+    def __call__(self, image, target):
+        image = F.pil_to_tensor(image)
+        target = torch.as_tensor(np.array(target), dtype=torch.int64)
+        return image, target
+
+
+class ToDtype:
+    def __init__(self, dtype, scale=False):
+        self.dtype = dtype
+        self.scale = scale
+
+    def __call__(self, image, target):
+        if not self.scale:
+            return image.to(dtype=self.dtype), target
+        image = F.convert_image_dtype(image, self.dtype)
+        return image, target
+
+
+class Normalize:
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, image, target):
+        image = F.normalize(image, mean=self.mean, std=self.std)
+        return image, target
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..92db18998511d7cd560de74cd8a60966d2960899
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py
@@ -0,0 +1,300 @@
+import datetime
+import errno
+import os
+import time
+from collections import defaultdict, deque
+
+import torch
+import torch.distributed as dist
+
+
+class SmoothedValue:
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        t = reduce_across_processes([self.count, self.total])
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
+        )
+
+
+class ConfusionMatrix:
+    def __init__(self, num_classes):
+        self.num_classes = num_classes
+        self.mat = None
+
+    def update(self, a, b):
+        n = self.num_classes
+        if self.mat is None:
+            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
+        with torch.inference_mode():
+            k = (a >= 0) & (a < n)
+            inds = n * a[k].to(torch.int64) + b[k]
+            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
+
+    def reset(self):
+        self.mat.zero_()
+
+    def compute(self):
+        h = self.mat.float()
+        acc_global = torch.diag(h).sum() / h.sum()
+        acc = torch.diag(h) / h.sum(1)
+        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
+        return acc_global, acc, iu
+
+    def reduce_from_all_processes(self):
+        self.mat = reduce_across_processes(self.mat).to(torch.int64)
+
+    def __str__(self):
+        acc_global, acc, iu = self.compute()
+        return ("global correct: {:.1f}\naverage row correct: {}\nIoU: {}\nmean IoU: {:.1f}").format(
+            acc_global.item() * 100,
+            [f"{i:.1f}" for i in (acc * 100).tolist()],
+            [f"{i:.1f}" for i in (iu * 100).tolist()],
+            iu.mean().item() * 100,
+        )
+
+
+class MetricLogger:
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            if not isinstance(v, (float, int)):
+                raise TypeError(
+                    f"This method expects the value of the input arguments to be of type float or int, instead  got {type(v)}"
+                )
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(f"{name}: {str(meter)}")
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ""
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join(
+                [
+                    header,
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                    "max mem: {memory:.0f}",
+                ]
+            )
+        else:
+            log_msg = self.delimiter.join(
+                [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
+            )
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
+                else:
+                    print(
+                        log_msg.format(
+                            i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
+                        )
+                    )
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print(f"{header} Total time: {total_time_str}")
+
+
+def cat_list(images, fill_value=0):
+    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
+    batch_shape = (len(images),) + max_size
+    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
+    for img, pad_img in zip(images, batched_imgs):
+        pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
+    return batched_imgs
+
+
+def collate_fn(batch):
+    images, targets = list(zip(*batch))
+    batched_imgs = cat_list(images, fill_value=0)
+    batched_targets = cat_list(targets, fill_value=255)
+    return batched_imgs, batched_targets
+
+
+def mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ["WORLD_SIZE"])
+        args.gpu = int(os.environ["LOCAL_RANK"])
+    # elif "SLURM_PROCID" in os.environ:
+    #     args.rank = int(os.environ["SLURM_PROCID"])
+    #     args.gpu = args.rank % torch.cuda.device_count()
+    elif hasattr(args, "rank"):
+        pass
+    else:
+        print("Not using distributed mode")
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = "nccl"
+    print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
+    torch.distributed.init_process_group(
+        backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
+    )
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+
+
+def reduce_across_processes(val):
+    if not is_dist_avail_and_initialized():
+        # nothing to sync, but we still convert to tensor for consistency with the distributed case.
+        return torch.tensor(val)
+
+    t = torch.tensor(val, device="cuda")
+    dist.barrier()
+    dist.all_reduce(t)
+    return t
diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1a8b53e02ba016a49e5c96f3ea5c70a87bb5c47
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py
@@ -0,0 +1,83 @@
+"""This file only exists to be lazy-imported and avoid V2-related import warnings when just using V1."""
+import torch
+from torchvision import tv_tensors
+from torchvision.transforms import v2
+
+
+class PadIfSmaller(v2.Transform):
+    def __init__(self, size, fill=0):
+        super().__init__()
+        self.size = size
+        self.fill = v2._utils._setup_fill_arg(fill)
+
+    def _get_params(self, sample):
+        _, height, width = v2._utils.query_chw(sample)
+        padding = [0, 0, max(self.size - width, 0), max(self.size - height, 0)]
+        needs_padding = any(padding)
+        return dict(padding=padding, needs_padding=needs_padding)
+
+    def _transform(self, inpt, params):
+        if not params["needs_padding"]:
+            return inpt
+
+        fill = v2._utils._get_fill(self.fill, type(inpt))
+        fill = v2._utils._convert_fill_arg(fill)
+
+        return v2.functional.pad(inpt, padding=params["padding"], fill=fill)
+
+
+class CocoDetectionToVOCSegmentation(v2.Transform):
+    """Turn samples from datasets.CocoDetection into the same format as VOCSegmentation.
+
+    This is achieved in two steps:
+
+    1. COCO differentiates between 91 categories while VOC only supports 21, including background for both. Fortunately,
+       the COCO categories are a superset of the VOC ones and thus can be mapped. Instances of the 70 categories not
+       present in VOC are dropped and replaced by background.
+    2. COCO only offers detection masks, i.e. a (N, H, W) bool-ish tensor, where the truthy values in each individual
+       mask denote the instance. However, a segmentation mask is a (H, W) integer tensor (typically torch.uint8), where
+       the value of each pixel denotes the category it belongs to. The detection masks are merged into one segmentation
+       mask while pixels that belong to multiple detection masks are marked as invalid.
+    """
+
+    COCO_TO_VOC_LABEL_MAP = dict(
+        zip(
+            [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72],
+            range(21),
+        )
+    )
+    INVALID_VALUE = 255
+
+    def _coco_detection_masks_to_voc_segmentation_mask(self, target):
+        if "masks" not in target:
+            return None
+
+        instance_masks, instance_labels_coco = target["masks"], target["labels"]
+
+        valid_labels_voc = [
+            (idx, label_voc)
+            for idx, label_coco in enumerate(instance_labels_coco.tolist())
+            if (label_voc := self.COCO_TO_VOC_LABEL_MAP.get(label_coco)) is not None
+        ]
+
+        if not valid_labels_voc:
+            return None
+
+        valid_voc_category_idcs, instance_labels_voc = zip(*valid_labels_voc)
+
+        instance_masks = instance_masks[list(valid_voc_category_idcs)].to(torch.uint8)
+        instance_labels_voc = torch.tensor(instance_labels_voc, dtype=torch.uint8)
+
+        # Calling `.max()` on the stacked detection masks works fine to separate background from foreground as long as
+        # there is at most a single instance per pixel. Overlapping instances will be filtered out in the next step.
+        segmentation_mask, _ = (instance_masks * instance_labels_voc.reshape(-1, 1, 1)).max(dim=0)
+        segmentation_mask[instance_masks.sum(dim=0) > 1] = self.INVALID_VALUE
+
+        return segmentation_mask
+
+    def forward(self, image, target):
+        segmentation_mask = self._coco_detection_masks_to_voc_segmentation_mask(target)
+        if segmentation_mask is None:
+            segmentation_mask = torch.zeros(v2.functional.get_size(image), dtype=torch.uint8)
+
+        return image, tv_tensors.Mask(segmentation_mask)