diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py new file mode 100644 index 0000000000000000000000000000000000000000..a1a6d9203c5b65aa0c11eee0d157033480f6ce71 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/export.py @@ -0,0 +1,8 @@ +import torch + +model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True) +model.eval() +input_data = torch.ones(1, 3, 520, 520) +ts_model = torch.jit.trace(model, input_data, strict=False) +ts_model.save("./deeplabv3_resnet50.ts") +print(f'finish save model') diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py new file mode 100644 index 0000000000000000000000000000000000000000..406d8787d2fb3f30f427624b0a1ea07f6a4fbac0 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/perf.py @@ -0,0 +1,98 @@ +""" +切python2.0.1环境 +""" + +import argparse +import time +from tqdm import tqdm + +import torch +import numpy as np + +import torch_aie +from torch_aie import _enums +from ais_bench.infer.interface import InferSession + + +INPUT_WIDTH = 520 +INPUT_HEIGHT = 520 + + +def parse_args(): + args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.") + args.add_argument("--mode", choices=["om", "ts"], required=True, help="Specify the mode ('om' or 'ts').") + args.add_argument('--om_path',help='MobilenetV1 om file path', type=str, + default='/onnx/mobilenetv1/mobilenet-v1_bs1.om' + ) + args.add_argument('--ts_path',help='MobilenetV1 ts file path', type=str, + default='./deeplabv3_resnet50.zip' + ) + args.add_argument("--batch_size", type=int, default=4, help="batch size.") + args.add_argument("--opt_level", type=int, default=0, help="opt level.") + return args.parse_args() + +if __name__ == '__main__': + infer_times = 100 + om_cost = 0 + pt_cost = 0 + opts = parse_args() + OM_PATH = opts.om_path + TS_PATH = opts.ts_path + BATCH_SIZE = opts.batch_size + OPTS_LEVEL = opts.opt_level + + if opts.mode == "om": + om_model = InferSession(0, OM_PATH) + for _ in tqdm(range(0, infer_times)): + dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32) + start = time.time() + output = om_model.infer([dummy_input], 'static', custom_sizes=90000000) # revise static + cost = time.time() - start + om_cost += cost + + if opts.mode == "ts": + ts_model = torch.jit.load(TS_PATH) + + input_info = [torch_aie.Input((BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT))] + + torch_aie.set_device(0) + print("start compile") + torchaie_model = torch_aie.compile( + ts_model, + inputs=input_info, + precision_policy=_enums.PrecisionPolicy.FP16, + soc_version='Ascend310P3', + optimization_level=OPTS_LEVEL + ) + print("end compile") + torchaie_model.eval() + + dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32) + input_tensor = torch.Tensor(dummy_input) + input_tensor = input_tensor.to("npu:0") + loops = 100 + warm_ctr = 10 + + default_stream = torch_aie.npu.default_stream() + time_cost = 0 + + while warm_ctr: + _ = torchaie_model(input_tensor) + default_stream.synchronize() + warm_ctr -= 1 + + for i in range(loops): + t0 = time.time() + _ = torchaie_model(input_tensor) + default_stream.synchronize() + t1 = time.time() + time_cost += (t1 - t0) + + print(f"fps: {loops} * {BATCH_SIZE} / {time_cost : .3f} samples/s") + print("torch_aie fps: ", loops * BATCH_SIZE / time_cost) + + from datetime import datetime + current_time = datetime.now() + formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") + print("Current Time:", formatted_time) + diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py new file mode 100644 index 0000000000000000000000000000000000000000..20c4ca36ca562156d87db88e9a0ea503db2b0520 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/presets.py @@ -0,0 +1,110 @@ +import torch + + +def get_modules(use_v2): + # We need a protected import to avoid the V2 warning in case just V1 is used + if use_v2: + import torchvision.transforms.v2 + import torchvision.tv_tensors + import v2_extras + + return torchvision.transforms.v2, torchvision.tv_tensors, v2_extras + else: + import transforms + + return transforms, None, None + + +class SegmentationPresetTrain: + def __init__( + self, + *, + base_size, + crop_size, + hflip_prob=0.5, + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + backend="pil", + use_v2=False, + ): + T, tv_tensors, v2_extras = get_modules(use_v2) + + transforms = [] + backend = backend.lower() + if backend == "tv_tensor": + transforms.append(T.ToImage()) + elif backend == "tensor": + transforms.append(T.PILToTensor()) + elif backend != "pil": + raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}") + + transforms += [T.RandomResize(min_size=int(0.5 * base_size), max_size=int(2.0 * base_size))] + + if hflip_prob > 0: + transforms += [T.RandomHorizontalFlip(hflip_prob)] + + if use_v2: + # We need a custom pad transform here, since the padding we want to perform here is fundamentally + # different from the padding in `RandomCrop` if `pad_if_needed=True`. + transforms += [v2_extras.PadIfSmaller(crop_size, fill={tv_tensors.Mask: 255, "others": 0})] + + transforms += [T.RandomCrop(crop_size)] + + if backend == "pil": + transforms += [T.PILToTensor()] + + if use_v2: + img_type = tv_tensors.Image if backend == "tv_tensor" else torch.Tensor + transforms += [ + T.ToDtype(dtype={img_type: torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True) + ] + else: + # No need to explicitly convert masks as they're magically int64 already + transforms += [T.ToDtype(torch.float, scale=True)] + + transforms += [T.Normalize(mean=mean, std=std)] + if use_v2: + transforms += [T.ToPureTensor()] + + self.transforms = T.Compose(transforms) + + def __call__(self, img, target): + return self.transforms(img, target) + + +class SegmentationPresetEval: + def __init__( + self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), backend="pil", use_v2=False + ): + T, _, _ = get_modules(use_v2) + + transforms = [] + backend = backend.lower() + if backend == "tensor": + transforms += [T.PILToTensor()] + elif backend == "tv_tensor": + transforms += [T.ToImage()] + elif backend != "pil": + raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}") + + if use_v2: + transforms += [T.Resize(size=(base_size, base_size))] + else: + # transforms += [T.RandomResize(min_size=base_size, max_size=base_size)] + transforms += [T.Resize(size=(base_size, base_size))] + + if backend == "pil": + # Note: we could just convert to pure tensors even in v2? + transforms += [T.ToImage() if use_v2 else T.PILToTensor()] + + transforms += [ + T.ToDtype(torch.float, scale=True), + T.Normalize(mean=mean, std=std), + ] + if use_v2: + transforms += [T.ToPureTensor()] + + self.transforms = T.Compose(transforms) + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py new file mode 100644 index 0000000000000000000000000000000000000000..c4bf9e8b90b8e4f9abb9aa5f0c7187555bb24217 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/run.py @@ -0,0 +1,206 @@ +import os +import copy +import warnings +import argparse + +import torch +import torchvision +from PIL import Image +from pycocotools import mask as coco_mask + +import utils +import presets +from transforms import Compose +import torch_aie +from torch_aie import _enums + + +NUM_CLASSES = 21 +USE_V2 = False +USE_NPU = True + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +def get_transform(is_train, args): + if is_train: + return presets.SegmentationPresetTrain(base_size=520, crop_size=480, backend=args.backend, use_v2=USE_V2) + else: + return presets.SegmentationPresetEval(base_size=520, backend="PIL", use_v2=USE_V2) + + +def _coco_remove_images_without_annotations(dataset, cat_list=None): + def _has_valid_annotation(anno): + if len(anno) == 0: + return False + return sum(obj["area"] for obj in anno) > 1000 + + ids = [] + for ds_idx, img_id in enumerate(dataset.ids): + ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = dataset.coco.loadAnns(ann_ids) + if cat_list: + anno = [obj for obj in anno if obj["category_id"] in cat_list] + if _has_valid_annotation(anno): + ids.append(ds_idx) + + dataset = torch.utils.data.Subset(dataset, ids) + return dataset + + +class FilterAndRemapCocoCategories: + def __init__(self, categories, remap=True): + self.categories = categories + self.remap = remap + + def __call__(self, image, anno): + anno = [obj for obj in anno if obj["category_id"] in self.categories] + if not self.remap: + return image, anno + anno = copy.deepcopy(anno) + for obj in anno: + obj["category_id"] = self.categories.index(obj["category_id"]) + return image, anno + + +class ConvertCocoPolysToMask: + def __call__(self, image, anno): + w, h = image.size + segmentations = [obj["segmentation"] for obj in anno] + cats = [obj["category_id"] for obj in anno] + if segmentations: + masks = convert_coco_poly_to_mask(segmentations, h, w) + cats = torch.as_tensor(cats, dtype=masks.dtype) + target, _ = (masks * cats[:, None, None]).max(dim=0) + target[masks.sum(0) > 1] = 255 + else: + target = torch.zeros((h, w), dtype=torch.uint8) + target = Image.fromarray(target.numpy()) + return image, target + + +def get_coco(root, image_set, transforms): + PATHS = { + "train": ("train2017", os.path.join("annotations", "instances_train2017.json")), + "val": ("val2017", os.path.join("annotations", "instances_val2017.json")), + } + CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72] + + img_folder, ann_file = PATHS[image_set] + img_folder = os.path.join(root, img_folder) + ann_file = os.path.join(root, ann_file) + + if USE_V2: + import v2_extras + from torchvision.datasets import wrap_dataset_for_transforms_v2 + + transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms]) + dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) + dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"}) + else: + transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms]) + dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) + + if image_set == "train": + dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST) + + return dataset + + +def evaluate(model, data_loader, device, num_classes): + model.eval() + confmat = utils.ConfusionMatrix(num_classes) + metric_logger = utils.MetricLogger(delimiter=" ") + header = "Test:" + num_processed_samples = 0 + with torch.inference_mode(): + ctr = 0 + for image, target in metric_logger.log_every(data_loader, 100, header): + print(ctr) + ctr += 1 + image, target = image.to(device), target.to(device) + output = model(image) + + if USE_NPU: + output = output.to("cpu") + target = target.to("cpu") + + confmat.update(target.flatten(), output.argmax(1).flatten()) + num_processed_samples += image.shape[0] + print(confmat) + + confmat.reduce_from_all_processes() + + num_processed_samples = utils.reduce_across_processes(num_processed_samples) + if ( + hasattr(data_loader.dataset, "__len__") + and len(data_loader.dataset) != num_processed_samples + and torch.distributed.get_rank() == 0 + ): + warnings.warn( + f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " + "samples were used for the validation, which might bias the results. " + "Try adjusting the batch size and / or the world size. " + "Setting the world size to 1 is always a safe bet." + ) + + return confmat + + +def parse_args(): + args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.") + args.add_argument('--dataset_path',help='coco2017 dataset path', type=str, + default='./coco2017' + ) # "/home/ascend/coco2017" + args.add_argument('--ts_path',help='DeeplabV3 ts path', type=str, + default='./deeplabv3_resnet50.zip' + ) + return args.parse_args() + + +def main(dataset_path, ts_path): + dataset = get_coco(root=dataset_path, image_set="val", transforms=get_transform(is_train=False, args=None)) + model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True) + + test_sampler = torch.utils.data.SequentialSampler(dataset) + data_loader_test = torch.utils.data.DataLoader( + dataset, batch_size=1, sampler=test_sampler, collate_fn=utils.collate_fn + ) + device = "cpu" + if USE_NPU: + device = "npu:0" + ts_model = torch.jit.load(ts_path) + input_info = [torch_aie.Input((1, 3, 520, 520))] + torch_aie.set_device(0) + print("start compile") + model = torch_aie.compile( + ts_model, + inputs=input_info, + precision_policy=_enums.PrecisionPolicy.FP16, + soc_version='Ascend310P3' + ) + print("end compile") + confmat = evaluate(model, data_loader_test, device=device, num_classes=NUM_CLASSES) + print(confmat) + + +if __name__ == "__main__": + args = parse_args() + dataset_path = args.dataset_path + ts_path = args.ts_path + main(dataset_path, ts_path) \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..ce0c59581acf328aaf35ff94bb3ff4e699eb650c --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/transforms.py @@ -0,0 +1,112 @@ +import random + +import numpy as np +import torch +from torchvision import transforms as T +from torchvision.transforms import functional as F + + +def pad_if_smaller(img, size, fill=0): + min_size = min(img.size) + if min_size < size: + ow, oh = img.size + padh = size - oh if oh < size else 0 + padw = size - ow if ow < size else 0 + img = F.pad(img, (0, 0, padw, padh), fill=fill) + return img + + +class Compose: + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target): + for t in self.transforms: + image, target = t(image, target) + return image, target + + +class RandomResize: + def __init__(self, min_size, max_size=None): + self.min_size = min_size + if max_size is None: + max_size = min_size + self.max_size = max_size + + def __call__(self, image, target): + size = random.randint(self.min_size, self.max_size) + image = F.resize(image, size, antialias=True) + target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST) + return image, target + +class Resize: + def __init__(self, size): + self.size = size + + def __call__(self, image, target): + image = F.resize(image, self.size, antialias=True) + target = F.resize(target, self.size, interpolation=T.InterpolationMode.NEAREST) + return image, target + + +class RandomHorizontalFlip: + def __init__(self, flip_prob): + self.flip_prob = flip_prob + + def __call__(self, image, target): + if random.random() < self.flip_prob: + image = F.hflip(image) + target = F.hflip(target) + return image, target + + +class RandomCrop: + def __init__(self, size): + self.size = size + + def __call__(self, image, target): + image = pad_if_smaller(image, self.size) + target = pad_if_smaller(target, self.size, fill=255) + crop_params = T.RandomCrop.get_params(image, (self.size, self.size)) + image = F.crop(image, *crop_params) + target = F.crop(target, *crop_params) + return image, target + + +class CenterCrop: + def __init__(self, size): + self.size = size + + def __call__(self, image, target): + image = F.center_crop(image, self.size) + target = F.center_crop(target, self.size) + return image, target + + +class PILToTensor: + def __call__(self, image, target): + image = F.pil_to_tensor(image) + target = torch.as_tensor(np.array(target), dtype=torch.int64) + return image, target + + +class ToDtype: + def __init__(self, dtype, scale=False): + self.dtype = dtype + self.scale = scale + + def __call__(self, image, target): + if not self.scale: + return image.to(dtype=self.dtype), target + image = F.convert_image_dtype(image, self.dtype) + return image, target + + +class Normalize: + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, image, target): + image = F.normalize(image, mean=self.mean, std=self.std) + return image, target diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..92db18998511d7cd560de74cd8a60966d2960899 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/utils.py @@ -0,0 +1,300 @@ +import datetime +import errno +import os +import time +from collections import defaultdict, deque + +import torch +import torch.distributed as dist + + +class SmoothedValue: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + t = reduce_across_processes([self.count, self.total]) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) + + +class ConfusionMatrix: + def __init__(self, num_classes): + self.num_classes = num_classes + self.mat = None + + def update(self, a, b): + n = self.num_classes + if self.mat is None: + self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) + with torch.inference_mode(): + k = (a >= 0) & (a < n) + inds = n * a[k].to(torch.int64) + b[k] + self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) + + def reset(self): + self.mat.zero_() + + def compute(self): + h = self.mat.float() + acc_global = torch.diag(h).sum() / h.sum() + acc = torch.diag(h) / h.sum(1) + iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) + return acc_global, acc, iu + + def reduce_from_all_processes(self): + self.mat = reduce_across_processes(self.mat).to(torch.int64) + + def __str__(self): + acc_global, acc, iu = self.compute() + return ("global correct: {:.1f}\naverage row correct: {}\nIoU: {}\nmean IoU: {:.1f}").format( + acc_global.item() * 100, + [f"{i:.1f}" for i in (acc * 100).tolist()], + [f"{i:.1f}" for i in (iu * 100).tolist()], + iu.mean().item() * 100, + ) + + +class MetricLogger: + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + if not isinstance(v, (float, int)): + raise TypeError( + f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}" + ) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append(f"{name}: {str(meter)}") + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = "" + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" + if torch.cuda.is_available(): + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) + else: + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) + else: + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print(f"{header} Total time: {total_time_str}") + + +def cat_list(images, fill_value=0): + max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) + batch_shape = (len(images),) + max_size + batched_imgs = images[0].new(*batch_shape).fill_(fill_value) + for img, pad_img in zip(images, batched_imgs): + pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) + return batched_imgs + + +def collate_fn(batch): + images, targets = list(zip(*batch)) + batched_imgs = cat_list(images, fill_value=0) + batched_targets = cat_list(targets, fill_value=255) + return batched_imgs, batched_targets + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop("force", False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + # elif "SLURM_PROCID" in os.environ: + # args.rank = int(os.environ["SLURM_PROCID"]) + # args.gpu = args.rank % torch.cuda.device_count() + elif hasattr(args, "rank"): + pass + else: + print("Not using distributed mode") + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = "nccl" + print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) + torch.distributed.init_process_group( + backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank + ) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) + + +def reduce_across_processes(val): + if not is_dist_avail_and_initialized(): + # nothing to sync, but we still convert to tensor for consistency with the distributed case. + return torch.tensor(val) + + t = torch.tensor(val, device="cuda") + dist.barrier() + dist.all_reduce(t) + return t diff --git a/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py new file mode 100644 index 0000000000000000000000000000000000000000..e1a8b53e02ba016a49e5c96f3ea5c70a87bb5c47 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/segmentation/Deeplabv3/v2_extras.py @@ -0,0 +1,83 @@ +"""This file only exists to be lazy-imported and avoid V2-related import warnings when just using V1.""" +import torch +from torchvision import tv_tensors +from torchvision.transforms import v2 + + +class PadIfSmaller(v2.Transform): + def __init__(self, size, fill=0): + super().__init__() + self.size = size + self.fill = v2._utils._setup_fill_arg(fill) + + def _get_params(self, sample): + _, height, width = v2._utils.query_chw(sample) + padding = [0, 0, max(self.size - width, 0), max(self.size - height, 0)] + needs_padding = any(padding) + return dict(padding=padding, needs_padding=needs_padding) + + def _transform(self, inpt, params): + if not params["needs_padding"]: + return inpt + + fill = v2._utils._get_fill(self.fill, type(inpt)) + fill = v2._utils._convert_fill_arg(fill) + + return v2.functional.pad(inpt, padding=params["padding"], fill=fill) + + +class CocoDetectionToVOCSegmentation(v2.Transform): + """Turn samples from datasets.CocoDetection into the same format as VOCSegmentation. + + This is achieved in two steps: + + 1. COCO differentiates between 91 categories while VOC only supports 21, including background for both. Fortunately, + the COCO categories are a superset of the VOC ones and thus can be mapped. Instances of the 70 categories not + present in VOC are dropped and replaced by background. + 2. COCO only offers detection masks, i.e. a (N, H, W) bool-ish tensor, where the truthy values in each individual + mask denote the instance. However, a segmentation mask is a (H, W) integer tensor (typically torch.uint8), where + the value of each pixel denotes the category it belongs to. The detection masks are merged into one segmentation + mask while pixels that belong to multiple detection masks are marked as invalid. + """ + + COCO_TO_VOC_LABEL_MAP = dict( + zip( + [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72], + range(21), + ) + ) + INVALID_VALUE = 255 + + def _coco_detection_masks_to_voc_segmentation_mask(self, target): + if "masks" not in target: + return None + + instance_masks, instance_labels_coco = target["masks"], target["labels"] + + valid_labels_voc = [ + (idx, label_voc) + for idx, label_coco in enumerate(instance_labels_coco.tolist()) + if (label_voc := self.COCO_TO_VOC_LABEL_MAP.get(label_coco)) is not None + ] + + if not valid_labels_voc: + return None + + valid_voc_category_idcs, instance_labels_voc = zip(*valid_labels_voc) + + instance_masks = instance_masks[list(valid_voc_category_idcs)].to(torch.uint8) + instance_labels_voc = torch.tensor(instance_labels_voc, dtype=torch.uint8) + + # Calling `.max()` on the stacked detection masks works fine to separate background from foreground as long as + # there is at most a single instance per pixel. Overlapping instances will be filtered out in the next step. + segmentation_mask, _ = (instance_masks * instance_labels_voc.reshape(-1, 1, 1)).max(dim=0) + segmentation_mask[instance_masks.sum(dim=0) > 1] = self.INVALID_VALUE + + return segmentation_mask + + def forward(self, image, target): + segmentation_mask = self._coco_detection_masks_to_voc_segmentation_mask(target) + if segmentation_mask is None: + segmentation_mask = torch.zeros(v2.functional.get_size(image), dtype=torch.uint8) + + return image, tv_tensors.Mask(segmentation_mask)