From c9e409f69c785a91e7b2e411ac2d77e6ddecfe42 Mon Sep 17 00:00:00 2001 From: celianguai <985261217@qq.com> Date: Wed, 14 Sep 2022 20:17:24 +0800 Subject: [PATCH 1/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?= =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?= =?UTF-8?q?ch][ECANet]--=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/classification/ECANet/.gitignore | 2 + .../contrib/cv/classification/ECANet/LICENSE | 52 ++ .../cv/classification/ECANet/README.md | 187 +++++ .../cv/classification/ECANet/light_main.py | 434 ++++++++++++ .../contrib/cv/classification/ECANet/main.py | 662 ++++++++++++++++++ .../classification/ECANet/models/__init__.py | 12 + .../ECANet/models/eca_mobilenetv2.py | 139 ++++ .../ECANet/models/eca_module.py | 39 ++ .../cv/classification/ECANet/models/eca_ns.py | 32 + .../ECANet/models/eca_resnet.py | 220 ++++++ .../classification/ECANet/modelzoo_level.txt | 3 + .../cv/classification/ECANet/paras_flops.py | 65 ++ .../cv/classification/ECANet/requirements.txt | 3 + .../cv/classification/ECANet/test/env_npu.sh | 76 ++ .../ECANet/test/train_eval_8p.sh | 123 ++++ .../ECANet/test/train_finetune_1p.sh | 152 ++++ .../ECANet/test/train_full_1p.sh | 127 ++++ .../ECANet/test/train_full_8p.sh | 127 ++++ .../ECANet/test/train_performance_1p.sh | 127 ++++ .../ECANet/test/train_performance_8p.sh | 130 ++++ 20 files changed, 2712 insertions(+) create mode 100644 PyTorch/contrib/cv/classification/ECANet/.gitignore create mode 100644 PyTorch/contrib/cv/classification/ECANet/LICENSE create mode 100644 PyTorch/contrib/cv/classification/ECANet/README.md create mode 100644 PyTorch/contrib/cv/classification/ECANet/light_main.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/main.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/__init__.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_module.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt create mode 100644 PyTorch/contrib/cv/classification/ECANet/paras_flops.py create mode 100644 PyTorch/contrib/cv/classification/ECANet/requirements.txt create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh diff --git a/PyTorch/contrib/cv/classification/ECANet/.gitignore b/PyTorch/contrib/cv/classification/ECANet/.gitignore new file mode 100644 index 0000000000..f53d18e061 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +.vscode diff --git a/PyTorch/contrib/cv/classification/ECANet/LICENSE b/PyTorch/contrib/cv/classification/ECANet/LICENSE new file mode 100644 index 0000000000..7970c97fb4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/LICENSE @@ -0,0 +1,52 @@ +MIT License + +Copyright (c) 2019 BangguWu, Qilong Wang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +BSD 3-Clause License + +Copyright (c) 2020, BangguWu +All rights reserved. +Copyright 2022 Huawei Technologies Co., Ltd + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/PyTorch/contrib/cv/classification/ECANet/README.md b/PyTorch/contrib/cv/classification/ECANet/README.md new file mode 100644 index 0000000000..047fb2b0c7 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/README.md @@ -0,0 +1,187 @@ +# ECANet for PyTorch + +- [概述](概述.md) +- [准备训练环境](准备训练环境.md) +- [开始训练](开始训练.md) +- [训练结果展示](训练结果展示.md) +- [版本说明](版本说明.md) + + + +# 概述 + +## 简述 + +ECANet是一个高效的图像分类网络。在残差网络上使用了高效通道注意力(ECA)模块,有效避免了降维对于通道注意力学习效果的影响。ECANet是以ResNet50为基础,增加了ECABottleneck层,该模型只涉及少数几个参数,但具有明显的效果增益。 + +- 参考实现: + + ``` + url=https://github.com/BangguWu/ECANet.git + ``` + +- 适配昇腾 AI 处理器的实现: + + ``` + url=https://gitee.com/ascend/ModelZoo-PyTorch.git + code_path=PyTorch/contrib/cv/classification + ``` + +- 通过Git获取代码方法如下: + + ``` + git clone https://gitee.com/celianguai/ModelZoo-PyTorch.git # 克隆仓库的代码 + cd ./ModelZoo-PyTorch/PyTorch/contrib/cv/classification/ECANet # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +- 通过单击“立即下载”,下载源码包。 + +# 准备训练环境 + +## 准备环境 + +- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。 + + **表 1** 版本配套表 + + | 配套 | 版本 | + | ---------- | ------------------------------------------------------------ | + | 固件与驱动 | [1.0.15](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | + | CANN | [5.1.RC1](https://www.hiascend.com/software/cann/commercial?version=5.1.RC1) | + | PyTorch | [1.5.0](https://gitee.com/ascend/pytorch/tree/v1.5.0/) | + +- 环境准备指导。 + + 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 + +- 安装依赖(根据模型需求,按需添加所需依赖)。 + + ``` + pip install -r requirements.txt + ``` + + +## 准备数据集 + +1. 获取数据集。 + + 请用户自行准备好数据集,可选用ImageNet-1K数据集,包含train和val两部分,数据集目录结构参考如下所示。 + + ``` + ├── ImageNet + ├──train + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ├──... + ├──val + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ``` + +2. 数据预处理(按需处理所需要的数据集)。 + + +# 开始训练 + +## 训练模型 + +1. 进入解压后的源码包根目录。 + + ``` + cd ./ECANet + ``` + +2. 运行训练脚本。 + + 该模型支持单机单卡训练和单机8卡训练。 + + - 单机单卡训练 + + 启动单卡训练。 + + ``` + # training 1p accuracy + bash ./test/train_full_1p.sh --data_path=real_data_path + + # training 1p performance + bash ./test/train_performance_1p.sh --data_path=real_data_path + + # finetuning 1p + bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + ``` + + - 单机8卡训练 + + 启动8卡训练。 + + ``` + # training 8p accuracy + bash ./test/train_full_8p.sh --data_path=real_data_path + + # training 8p performance + bash ./test/train_performance_8p.sh --data_path=real_data_path + + #test 8p accuracy + bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + ``` + + --data_path参数填写数据集路径 --pth_path参数填写预训练模型路径 + + 模型训练脚本参数说明如下。 + + ``` + 公共参数: + --arch //使用模型,默认:eca_resnet50 + --ksize //卷积核的大小,默认:3557 + --workers //多线程读取数据集 + --data //数据集路径 + --device //训练设备类型 + --gpu //指定训练用卡编号 + --epochs //重复训练次数 + --batch-size //训练批次大小 + --lr //初始学习率,默认:0.1 + --resume //中断重新开始模型参数路径 + --pretrained //预训练模型路径 + --amp //是否使用混合精度 + + 多卡训练参数: + --multiprocessing-distributed //是否使用多卡训练 + --device-list '0,1,2,3,4,5,6,7' //多卡训练指定训练用卡 + ``` + + 训练完成后,权重文件保存在当前路径下,并输出模型训练精度和性能信息。 + +# 训练结果展示 + +**表 2** 训练结果展示表 + +| NAME | Acc@1 | FPS | Epochs | AMP_Type | +| ------- | ----- | ---: | ------ | -------: | +| 1p-竞品 | - | 610 | 1 | - | +| 1p-NPU | - | 911 | 1 | O2 | +| 8p-竞品 | 77.91 | 4200 | 100 | - | +| 8p-NPU | 78.30 | 6450 | 100 | O2 | + +# 版本说明 + +## 变更 + + +2022.09.14:首次发布。 + +## 已知问题 + + +无。 \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/light_main.py b/PyTorch/contrib/cv/classification/ECANet/light_main.py new file mode 100644 index 0000000000..13df143e74 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/light_main.py @@ -0,0 +1,434 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +import argparse +import os +import random +import shutil +import time +import warnings + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +import models +from thop import profile + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=400, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=96, type=int, + metavar='N', help='mini-batch size (default: 256)') +parser.add_argument('--lr', '--learning-rate', default=0.045, type=float, + metavar='LR', help='initial learning rate') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--weight-decay', '--wd', default=4e-5, type=float, + metavar='W', help='weight decay (default: 1e-4)') +parser.add_argument('--print-freq', '-p', default=500, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=1, type=int, + help='number of distributed processes') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='gloo', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--action', default='', type=str, + help='other information.') + + +best_prec1 = 0 + + +def main(): + global args, best_prec1 + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + args.distributed = args.world_size > 1 + + if args.distributed: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size) + + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](pretrained=True) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch]() + + if args.gpu is not None: + model = model.cuda(args.gpu) + elif args.distributed: + model.cuda() + model = torch.nn.parallel.DistributedDataParallel(model) + + else: + if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): + model.features = torch.nn.DataParallel(model.features) + model.cuda() + else: + model = torch.nn.DataParallel(model).cuda() + + print(model) + + # get the number of models parameters + print('Number of models parameters: {}'.format( + sum([p.data.nelement() for p in model.parameters()]))) + with torch.cuda.device(0): + net = models.__dict__[args.arch]() + # flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True) + flops, params = profile(net, inputs=(torch.randn(1,3,224,224),)) + print('Flops: ' + str(flops)) + print('Params: ' + str(params)) + del net, flops, params + + # define loss function (criterion) and optimizer + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + optimizer = torch.optim.SGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + checkpoint = torch.load(args.resume) + args.start_epoch = checkpoint['epoch'] + best_prec1 = checkpoint['best_prec1'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + del checkpoint + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + + if args.evaluate: + m = time.time() + _, _ =validate(val_loader, model, criterion) + n = time.time() + print((n-m)/3600) + return + + directory = "runs/%s/"%(args.arch + '_' + args.action) + if not os.path.exists(directory): + os.makedirs(directory) + + Loss_plot = {} + train_prec1_plot = {} + train_prec5_plot = {} + val_prec1_plot = {} + val_prec5_plot = {} + + for epoch in range(args.start_epoch, args.epochs): + start_time = time.time() + if args.distributed: + train_sampler.set_epoch(epoch) + adjust_learning_rate(optimizer, epoch) + + # train for one epoch + # train(train_loader, model, criterion, optimizer, epoch) + loss_temp, train_prec1_temp, train_prec5_temp = train(train_loader, model, criterion, optimizer, epoch) + Loss_plot[epoch] = loss_temp + train_prec1_plot[epoch] = train_prec1_temp + train_prec5_plot[epoch] = train_prec5_temp + + # evaluate on validation set + # prec1 = validate(val_loader, model, criterion) + prec1, prec5 = validate(val_loader, model, criterion) + val_prec1_plot[epoch] = prec1 + val_prec5_plot[epoch] = prec5 + + # remember best prec@1 and save checkpoint + is_best = prec1 > best_prec1 + best_prec1 = max(prec1, best_prec1) + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_prec1': best_prec1, + 'optimizer' : optimizer.state_dict(), + }, is_best) + + # 将Loss,train_prec1,train_prec5,val_prec1,val_prec5用.txt的文件存起来 + data_save(directory + 'Loss_plot.txt', Loss_plot) + data_save(directory + 'train_prec1.txt', train_prec1_plot) + data_save(directory + 'train_prec5.txt', train_prec5_plot) + data_save(directory + 'val_prec1.txt', val_prec1_plot) + data_save(directory + 'val_prec5.txt', val_prec5_plot) + + end_time = time.time() + time_value = (end_time - start_time) / 3600 + print("-" * 80) + print(time_value) + print("-" * 80) + + +def train(train_loader, model, criterion, optimizer, epoch): + batch_time = AverageMeter() + data_time = AverageMeter() + losses = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + losses_batch = {} + # switch to train mode + model.train() + + end = time.time() + for i, (input, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.gpu is not None: + input = input.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(input) + loss = criterion(output, target) + + # measure accuracy and record loss + prec1, prec5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(prec1[0], input.size(0)) + top5.update(prec5[0], input.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Epoch: [{0}][{1}/{2}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' + + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + epoch, i, len(train_loader), batch_time=batch_time, + data_time=data_time, loss=losses, top1=top1, top5=top5)) + losses_batch[epoch] = losses.avg + loss_batch = open("loss_batch.txt", 'a') + for line in losses_batch: + loss_batch.write(str(epoch) + " " + str(line) + " " + str(losses_batch[line]) + '\n') + loss_batch.close() + + return losses.avg, top1.avg, top5.avg + + +def validate(val_loader, model, criterion): + batch_time = AverageMeter() + losses = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (input, target) in enumerate(val_loader): + if args.gpu is not None: + input = input.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(input) + loss = criterion(output, target) + + # measure accuracy and record loss + prec1, prec5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(prec1[0], input.size(0)) + top5.update(prec5[0], input.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Test: [{0}/{1}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + i, len(val_loader), batch_time=batch_time, loss=losses, + top1=top1, top5=top5)) + + print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + return top1.avg, top5.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + directory = "runs/%s/"%(args.arch + '_' + args.action) + + filename = directory + filename + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, directory + 'model_best.pth.tar') + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def adjust_learning_rate(optimizer, epoch): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + # lr = args.lr * (0.1 ** (epoch // 30)) + lr = args.lr * (0.98 ** epoch) + print('lr = ', lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +def data_save(root, file): + if not os.path.exists(root): + os.mknod(root) + file_temp = open(root, 'r') + lines = file_temp.readlines() + if not lines: + epoch = -1 + else: + epoch = lines[-1][:lines[-1].index(' ')] + epoch = int(epoch) + file_temp.close() + file_temp = open(root, 'a') + for line in file: + if line > epoch: + file_temp.write(str(line) + " " + str(file[line]) + '\n') + file_temp.close() + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/ECANet/main.py b/PyTorch/contrib/cv/classification/ECANet/main.py new file mode 100644 index 0000000000..c8a3c99c57 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/main.py @@ -0,0 +1,662 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +import argparse +import os +from os.path import exists, join, split +import random +import shutil +import time +import warnings +import json +import logging +import apex +from apex import amp +import threading +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import models + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('--data', metavar='DIR', + help='path to dataset') +parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=100, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', help='mini-batch size (default: 256)') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)') +parser.add_argument('--print-freq', '-p', default=50, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--pth_path', default='', type=str, metavar='PATH', + help='path to pretrained checkpoint (default: none)') +parser.add_argument('--world_size', default=1, type=int, + help='number of distributed processes') +parser.add_argument('--rank',default=0,type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://127.0.0.1:9278', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='hccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, nargs='+', + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing_distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--num_classes', default=1000, type=int, + help='The number of classes.') +parser.add_argument('--ksize', default=None, type=list, + help='Manually select the eca module kernel size') +parser.add_argument('--action', default='', type=str, + help='other information.') +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') +parser.add_argument('--profling', default=False, action='store_true', + help='use profiling to evaluate the performance of model') +parser.add_argument('--device', default='gpu', type=str, help='npu or gpu') +parser.add_argument('--addr', default='127.0.0.1', + type=str, help='master addr') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', + type=str, help='device id list') +parser.add_argument('--warm_up_epochs', default=5, type=int, + help='warm up') + + +best_prec1=0 + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def main(): + global args + args = parser.parse_args() + print(args.device_list) + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29998' + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + ngpus_per_node = len(args.process_device_map) + else: + if args.distributed: + ngpus_per_node = torch.cuda.device_count() + else: + ngpus_per_node = 1 + print('ngpus_per_node:', ngpus_per_node) + + if args.multiprocessing_distributed: + args.world_size = ngpus_per_node * args.world_size + mp.spawn(main_worker, nprocs=ngpus_per_node, + args=(ngpus_per_node, args)) + else: + main_worker(args.gpu, ngpus_per_node, args) + +def main_worker(gpu, ngpus_per_node, args): + + global best_prec1 + args.gpu = args.process_device_map[gpu] + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + args.rank = args.rank * ngpus_per_node + gpu + + if args.device == 'npu': + dist.init_process_group(backend=args.dist_backend, + world_size=args.world_size, rank=args.rank) + else: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](k_size=args.ksize, pretrained=True) + print("loading model of yours...") + if args.pth_path: + print("load pth you give") + pretrained_dict = torch.load(args.pth_path, map_location="cpu")["state_dict"] + else: + pretrained_dict = torch.load("./model_best.pth.tar", map_location="cpu")["state_dict"] + + if "fc.weight" in pretrained_dict: + print("pop fc layer weight") + pretrained_dict.pop('fc.weight') + pretrained_dict.pop('fc.bias') + model.load_state_dict(pretrained_dict, strict=False) + else: + print("=> creating model '{}'".format(args.arch)) + if args.ksize == None: + model = models.__dict__[args.arch]() + else: + model = models.__dict__[args.arch](k_size=args.ksize) + + if args.distributed: + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(loc) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + + args.batch_size = int(args.batch_size / args.world_size) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + else: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = model.to(loc) + else: + model.cuda() + + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(args.gpu) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + + else: + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + print("before : model = torch.nn.DataParallel(model).cuda()") + + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) + + if args.distributed: + if args.gpu is not None: + + if args.pretrained: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, + find_unused_parameters=True) + else: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) + else: + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + else: + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = torch.nn.DataParallel(model).to(loc) + else: + model = torch.nn.DataParallel(model).cuda() + + print(model) + + # get the number of models parameters + print('Number of models parameters: {}'.format( + sum([p.data.nelement() for p in model.parameters()]))) + + # define loss function (criterion) and optimizer + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + criterion = nn.CrossEntropyLoss().to(loc) + else: + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_prec1 = checkpoint['best_prec1'] + if args.gpu is not None: + best_prec1 = best_prec1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + + if args.evaluate: + m = time.time() + _, _ =validate(val_loader, model, criterion) + n = time.time() + print((n-m)/3600) + return + + if args.prof: + profiling(train_loader, model, criterion, optimizer, args) + return + + if args.profling: + cann_profiling(train_loader, model, criterion, optimizer, args) + return + + FPS = 0 + print("===> Training") + for epoch in range(args.start_epoch, args.epochs): + start_time = time.time() + if args.distributed: + train_sampler.set_epoch(epoch) + adjust_learning_rate(optimizer, epoch, args) + + # train for one epoch + epoch_FPS = train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node) + + # evaluate on validation set + prec1 = validate(val_loader, model, criterion, args, ngpus_per_node) + + # remember best prec@1 and save checkpoint + is_best = prec1 > best_prec1 + best_prec1 = max(prec1, best_prec1) + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + if args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_prec1': best_prec1, + 'optimizer' : optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_prec1': best_prec1, + 'optimizer' : optimizer.state_dict(), + }, is_best) + +#print profile +def profiling(train_loader, model, criterion, optimizer, args): + # switch to train mode + model.train() + + def update(model, input, target, optimizer): + output = model(input) + loss = criterion(output, target) + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + for step, (input, target) in enumerate(train_loader): + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + input = input.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + input = input.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + if step < 5: + update(model, input, target, optimizer) + else: + if args.device == 'npu': + with torch.autograd.profiler.profile(use_npu=True) as prof: + update(model, input, target, optimizer) + else: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + update(model, input, target, optimizer) + break + + prof.export_chrome_trace("output.prof") + +def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':6.4f') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + # switch to train mode + model.train() + + end = time.time() + for i, (input, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + input = input.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + input = input.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(input) + loss = criterion(output, target) + + # measure accuracy and record loss + prec1, prec5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(prec1[0], input.size(0)) + top5.update(prec5[0], input.size(0)) + + + # compute gradient and do SGD step + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + if args.device == 'npu': + torch.npu.synchronize() + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + if batch_time.avg: + print("[gpu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + args.batch_size * args.world_size / batch_time.avg)) + + epoch_FPS = args.batch_size * args.world_size / batch_time.avg + print(f"train_one_epoch FPS: {epoch_FPS}") + return epoch_FPS + + +def validate(val_loader, model, criterion, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':6.4f') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (input, target) in enumerate(val_loader): + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + input = input.to(loc).to(torch.float) + else: + input = input.cuda(args.gpu, non_blocking=True) + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(input) + loss = criterion(output, target) + + # measure accuracy and record loss + prec1, prec5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(prec1[0], input.size(0)) + top5.update(prec5[0], input.size(0)) + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self, name, fmt=':f', start_count_index=2): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + +class ProgressMeter(object): + + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + + if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: + lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) + else: + alpha = 0 + cosine_decay = 0.5 * ( + 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) + decayed = (1 - alpha) * cosine_decay + alpha + lr = args.lr * decayed + + print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) + + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/ECANet/models/__init__.py b/PyTorch/contrib/cv/classification/ECANet/models/__init__.py new file mode 100644 index 0000000000..52bcde7185 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/models/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +from .eca_resnet import * +from .eca_mobilenetv2 import * \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py new file mode 100644 index 0000000000..01a4dd8d63 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py @@ -0,0 +1,139 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +from torch import nn +from .eca_module import eca_layer + +__all__ = ['ECA_MobileNetV2', 'eca_mobilenet_v2'] + + +model_urls = { + 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', +} + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio, k_size): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + layers.append(eca_layer(oup, k_size)) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class ECA_MobileNetV2(nn.Module): + def __init__(self, num_classes=1000, width_mult=1.0): + super(ECA_MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + input_channel = int(input_channel * width_mult) + self.last_channel = int(last_channel * max(1.0, width_mult)) + features = [ConvBNReLU(3, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = int(c * width_mult) + for i in range(n): + if c < 96: + ksize = 1 + else: + ksize = 3 + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t, k_size=ksize)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.25), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.features(x) + x = x.mean(-1).mean(-1) + x = self.classifier(x) + return x + + +def eca_mobilenet_v2(pretrained=False, progress=True, **kwargs): + """ + Constructs a ECA_MobileNetV2 architecture from + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = ECA_MobileNetV2(**kwargs) + # if pretrained: + # state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], + # progress=progress) + # model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_module.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_module.py new file mode 100644 index 0000000000..0312550059 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_module.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +import torch +from torch import nn +from torch.nn.parameter import Parameter + +class eca_layer(nn.Module): + """Constructs a ECA module. + + Args: + channel: Number of channels of the input feature map + k_size: Adaptive selection of kernel size + """ + def __init__(self, channel, k_size=3): + super(eca_layer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # feature descriptor on the global spatial information + y = self.avg_pool(x) + + # Two different branches of ECA module + y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) + + # Multi-scale information fusion + y = self.sigmoid(y) + + return x * y.expand_as(x) + diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py new file mode 100644 index 0000000000..aefd239d56 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py @@ -0,0 +1,32 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +import torch +import time +from torch import nn + + +class eca_layer(nn.Module): + def __init__(self, channel, k_size): + super(eca_layer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.k_size = k_size + self.conv = nn.Conv1d(channel, channel, kernel_size=k_size, bias=False, groups=channel) + self.sigmoid = nn.Sigmoid() + + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x) + y = nn.functional.unfold(y.transpose(-1, -3), kernel_size=(1, self.k_size), padding=(0, (self.k_size - 1) // 2)) + y = self.conv(y.transpose(-1, -2)).unsqueeze(-1) + y = self.sigmoid(y) + x = x * y.expand_as(x) + return x diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py new file mode 100644 index 0000000000..e5ee62b459 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py @@ -0,0 +1,220 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +import torch.nn as nn +import math +# import torch.utils.model_zoo as model_zoo +from .eca_module import eca_layer + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class ECABasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, k_size=3): + super(ECABasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, 1) + self.bn2 = nn.BatchNorm2d(planes) + self.eca = eca_layer(planes, k_size) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.eca(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ECABottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, k_size=3): + super(ECABottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.eca = eca_layer(planes * 4, k_size) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + out = self.eca(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, k_size=[3, 3, 3, 3]): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0], int(k_size[0])) + self.layer2 = self._make_layer(block, 128, layers[1], int(k_size[1]), stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], int(k_size[2]), stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], int(k_size[3]), stride=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, k_size, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, k_size)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, k_size=k_size)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def eca_resnet18(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False): + """Constructs a ResNet-18 model. + + Args: + k_size: Adaptive selection of kernel size + pretrained (bool): If True, returns a model pre-trained on ImageNet + num_classes:The classes of classification + """ + model = ResNet(ECABasicBlock, [2, 2, 2, 2], num_classes=num_classes, k_size=k_size) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def eca_resnet34(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False): + """Constructs a ResNet-34 model. + + Args: + k_size: Adaptive selection of kernel size + pretrained (bool): If True, returns a model pre-trained on ImageNet + num_classes:The classes of classification + """ + model = ResNet(ECABasicBlock, [3, 4, 6, 3], num_classes=num_classes, k_size=k_size) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def eca_resnet50(k_size=[3, 3, 3, 3], num_classes=1000, pretrained=False): + """Constructs a ResNet-50 model. + + Args: + k_size: Adaptive selection of kernel size + num_classes:The classes of classification + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + print("Constructing eca_resnet50......") + model = ResNet(ECABottleneck, [3, 4, 6, 3], num_classes=num_classes, k_size=k_size) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def eca_resnet101(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False): + """Constructs a ResNet-101 model. + + Args: + k_size: Adaptive selection of kernel size + num_classes:The classes of classification + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(ECABottleneck, [3, 4, 23, 3], num_classes=num_classes, k_size=k_size) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def eca_resnet152(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False): + """Constructs a ResNet-152 model. + + Args: + k_size: Adaptive selection of kernel size + num_classes:The classes of classification + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(ECABottleneck, [3, 8, 36, 3], num_classes=num_classes, k_size=k_size) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model diff --git a/PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt new file mode 100644 index 0000000000..0b49b4fb26 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PerfStatus:OK +PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/paras_flops.py b/PyTorch/contrib/cv/classification/ECANet/paras_flops.py new file mode 100644 index 0000000000..9665a6a4a0 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/paras_flops.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020, Banggu Wu +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# References: +# ECANet:https://github.com/BangguWu/ECANet +# -------------------------------------------------------- +import torch +# import test_models as models +from thop import profile +import torchvision +import models +import argparse + + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('--arch', '-a', metavar='ARCH', default='eca_resnet50', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: eca_resnet50)') + +def main(): + global args + args = parser.parse_args() + model = models.__dict__[args.arch]() + print(model) + input = torch.randn(1, 3, 224, 224) + model.train() + # model.eval() + flops, params = profile(model, inputs=(input, )) + print("flops = ", flops) + print("params = ", params) + flops, params = clever_format([flops, params], "%.3f") + print("flops = ", flops) + print("params = ", params) + +def clever_format(nums, format="%.2f"): + clever_nums = [] + + for num in nums: + if num > 1e12: + clever_nums.append(format % (num / 1024 ** 4) + "T") + elif num > 1e9: + clever_nums.append(format % (num / 1024 ** 3) + "G") + elif num > 1e6: + clever_nums.append(format % (num / 1024 ** 2) + "M") + elif num > 1e3: + clever_nums.append(format % (num / 1024) + "K") + else: + clever_nums.append(format % num + "B") + + clever_nums = clever_nums[0] if len(clever_nums) == 1 else (*clever_nums, ) + + return clever_nums + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/ECANet/requirements.txt b/PyTorch/contrib/cv/classification/ECANet/requirements.txt new file mode 100644 index 0000000000..5d97f76367 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/requirements.txt @@ -0,0 +1,3 @@ +numpy +apex +tqdm diff --git a/PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh b/PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh new file mode 100644 index 0000000000..4740fafdcc --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh @@ -0,0 +1,76 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=0 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +export HCCL_IF_IP=$(hostname -I |awk '{print $1}') + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh new file mode 100644 index 0000000000..4c46f099ce --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh @@ -0,0 +1,123 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size resume RANK_SIZE +# 网络名称,同目录名称 +Network="ECANet" +# 训练batch_size +batch_size=2048 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path_info=$1 +data_path=`echo ${data_path_info#*=}` +# checkpoint文件路径,以实际路径为准 +pth_path="" + + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +if [[ $pth_path == "" ]];then + echo "[Error] para \"pth_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +nohup python3.7 main.py \ + --workers 184\ + --arch eca_resnet50 \ + --data ${data_path} \ + --ksize 3557 \ + --multiprocessing_distributed \ + --device npu \ + --batch-size ${batch_size} \ + --resume ${pth_path} \ + --amp \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + + +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`grep Test ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log | awk '{print $8}' | awk 'END {print}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh new file mode 100644 index 0000000000..96ff1ee826 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh @@ -0,0 +1,152 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ECANet" +# 训练batch_size +batch_size=256 +# 训练使用的npu卡数 +export RANK_SIZE=1 + +# 数据集路径,保持为空,不需要修改 +data_path_info=$1 +data_path=`echo ${data_path_info#*=}` +# checkpoint文件路径,以实际路径为准 +pth_path="" + +device_id=0 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +if [[ $pth_path == "" ]];then + echo "[Error] para \"pth_path\" must be confing" + exit 1 +fi + +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 main.py \ + --arch eca_resnet50 \ + --data ${data_path} \ + --ksize 3557 \ + --workers 16 \ + --lr 0.1 \ + --device npu \ + --gpu 0 \ + --pretrained \ + --pth_path ${pth_path} \ + --batch-size ${batch_size} \ + --amp \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh new file mode 100644 index 0000000000..c85a4a83bf --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ECANet" +# 训练batch_size +batch_size=256 +# 训练使用的npu卡数 +export RANK_SIZE=1 + +data_path_info=$1 +data_path=`echo ${data_path_info#*=}` + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 main.py \ + --workers 16\ + --arch eca_resnet50 \ + --epochs 100 \ + --data ${data_path} \ + --ksize 3557 \ + --device npu \ + --gpu 0 \ + --batch-size ${batch_size} \ + --lr 0.1 \ + --amp \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh new file mode 100644 index 0000000000..bc11125119 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ECANet" +# 训练batch_size +batch_size=2048 +# 训练使用的npu卡数 +export RANK_SIZE=8 + +data_path_info=$1 +data_path=`echo ${data_path_info#*=}` + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 main.py \ + --workers 184\ + --arch eca_resnet50 \ + --epochs 100 \ + --data ${data_path} \ + --ksize 3557 \ + --multiprocessing_distributed \ + --device npu \ + --batch-size ${batch_size} \ + --lr 0.8 \ + --amp \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh new file mode 100644 index 0000000000..639606b904 --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ECANet" +# 训练batch_size +batch_size=256 +# 训练使用的npu卡数 +export RANK_SIZE=1 + +data_path_info=$1 +data_path=`echo ${data_path_info#*=}` + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 main.py \ + --workers 16\ + --arch eca_resnet50 \ + --epochs 1 \ + --data ${data_path} \ + --ksize 3557 \ + --device npu \ + --gpu 0 \ + --batch-size ${batch_size} \ + --lr 0.1 \ + --amp \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh new file mode 100644 index 0000000000..3c17675d9c --- /dev/null +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ECANet" +# 训练batch_size +batch_size=2048 +# 训练使用的npu卡数 +export RANK_SIZE=8 + +# 训练epoch +train_epochs=1 + +data_path_info=$1 +data_path=`echo ${data_path_info#*=}` + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +nohup python3.7 main.py \ + --workers 184\ + --arch eca_resnet50 \ + --data ${data_path} \ + --ksize 3557 \ + --multiprocessing_distributed \ + --device npu \ + --batch-size ${batch_size} \ + --lr 0.8 \ + --epochs ${train_epochs} \ + --amp \ + > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee From 590c20877b2392d109dcd78a989b88c1a05e3e8a Mon Sep 17 00:00:00 2001 From: celianguai <985261217@qq.com> Date: Fri, 18 Nov 2022 13:04:36 +0800 Subject: [PATCH 2/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?= =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?= =?UTF-8?q?ch][ECANet]--=E8=84=9A=E6=9C=AC=E6=96=87=E4=BB=B6=E4=BF=AE?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ECANet/test/train_eval_8p.sh | 30 +++++++++++++++---- .../ECANet/test/train_finetune_1p.sh | 2 +- .../ECANet/test/train_full_1p.sh | 4 +-- .../ECANet/test/train_full_8p.sh | 4 +-- .../ECANet/test/train_performance_1p.sh | 4 +-- .../ECANet/test/train_performance_8p.sh | 6 ++-- 6 files changed, 34 insertions(+), 16 deletions(-) diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh index 4c46f099ce..bf484f5ace 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh @@ -78,6 +78,7 @@ nohup python3.7 main.py \ --arch eca_resnet50 \ --data ${data_path} \ --ksize 3557 \ + --lr 0.8 \ --multiprocessing_distributed \ --device npu \ --batch-size ${batch_size} \ @@ -89,28 +90,43 @@ nohup python3.7 main.py \ wait + ##################获取训练数据################ -# 训练结束时间,不需要修改 + +#训练结束时间,不需要修改 end_time=$(date +%s) e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" -# 训练用例信息,不需要修改 +#性能看护结果汇总 +#训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -# 最后一个迭代loss值,不需要修改 -ActualLoss=`grep Test ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log | awk '{print $8}' | awk 'END {print}'` +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` #关键信息打印到${CaseName}.log中,不需要修改 echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log @@ -118,6 +134,8 @@ echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${C echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh index 96ff1ee826..dc69f28035 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh @@ -116,7 +116,7 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh index c85a4a83bf..c4361aa57b 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh @@ -91,7 +91,7 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" @@ -109,7 +109,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh index bc11125119..a0e98a2e4f 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh @@ -91,7 +91,7 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" @@ -109,7 +109,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh index 639606b904..1de4f84e67 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh @@ -91,7 +91,7 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" @@ -109,7 +109,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh index 3c17675d9c..c802c075bc 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh @@ -94,7 +94,7 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" @@ -112,7 +112,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` @@ -127,4 +127,4 @@ echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${ echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From 4ab44e75ea0186dda12d75ce19202c47ceda35b2 Mon Sep 17 00:00:00 2001 From: 15972670948 <943012535@qq.com> Date: Sat, 10 Dec 2022 05:42:59 -0500 Subject: [PATCH 3/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?= =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?= =?UTF-8?q?ch][ECANet]--=E8=84=9A=E6=9C=AC=E6=96=87=E4=BB=B6=E4=BF=AE?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../contrib/cv/classification/ECANet/main.py | 10 ++-------- .../ECANet/test/train_full_8p.sh | 19 +++++++++++++------ .../ECANet/test/train_performance_8p.sh | 14 ++++++++++---- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/PyTorch/contrib/cv/classification/ECANet/main.py b/PyTorch/contrib/cv/classification/ECANet/main.py index c8a3c99c57..7be5661e0d 100644 --- a/PyTorch/contrib/cv/classification/ECANet/main.py +++ b/PyTorch/contrib/cv/classification/ECANet/main.py @@ -160,13 +160,7 @@ def main(): else: ngpus_per_node = 1 print('ngpus_per_node:', ngpus_per_node) - - if args.multiprocessing_distributed: - args.world_size = ngpus_per_node * args.world_size - mp.spawn(main_worker, nprocs=ngpus_per_node, - args=(ngpus_per_node, args)) - else: - main_worker(args.gpu, ngpus_per_node, args) + main_worker(args.gpu, ngpus_per_node, args) def main_worker(gpu, ngpus_per_node, args): @@ -175,7 +169,7 @@ def main_worker(gpu, ngpus_per_node, args): if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) - + args.world_size = int(os.environ["WORLD_SIZE"]) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh index a0e98a2e4f..c8a1bcced9 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh @@ -8,6 +8,8 @@ Network="ECANet" batch_size=2048 # 训练使用的npu卡数 export RANK_SIZE=8 +export WORLD_SIZE=8 +# 训练epoch data_path_info=$1 data_path=`echo ${data_path_info#*=}` @@ -62,10 +64,14 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -nohup python3.7 main.py \ - --workers 184\ +for((RANK_ID=0;RANK_ID<8;RANK_ID++)) +do + KERNEL_NUM=$(($(nproc)/$RANK_SIZE)) + PID_START=$((KERNEL_NUM * RANK_ID)) + PID_END=$((PID_START + KERNEL_NUM -1 )) + nohup taskset -c $PID_START-$PID_END python3.7 main.py \ + --workers 184 \ --arch eca_resnet50 \ - --epochs 100 \ --data ${data_path} \ --ksize 3557 \ --multiprocessing_distributed \ @@ -73,8 +79,9 @@ nohup python3.7 main.py \ --batch-size ${batch_size} \ --lr 0.8 \ --amp \ + --gpu ${RANK_ID} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - +done wait ##################获取训练数据################ @@ -109,7 +116,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` @@ -124,4 +131,4 @@ echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${ echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh index c802c075bc..53dd65b2ec 100644 --- a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh @@ -8,7 +8,7 @@ Network="ECANet" batch_size=2048 # 训练使用的npu卡数 export RANK_SIZE=8 - +export WORLD_SIZE=8 # 训练epoch train_epochs=1 @@ -65,8 +65,13 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -nohup python3.7 main.py \ - --workers 184\ +for((RANK_ID=0;RANK_ID<8;RANK_ID++)) +do + KERNEL_NUM=$(($(nproc)/$RANK_SIZE)) + PID_START=$((KERNEL_NUM * RANK_ID)) + PID_END=$((PID_START + KERNEL_NUM -1 )) + nohup taskset -c $PID_START-$PID_END python3.7 main.py \ + --workers 184 \ --arch eca_resnet50 \ --data ${data_path} \ --ksize 3557 \ @@ -76,8 +81,9 @@ nohup python3.7 main.py \ --lr 0.8 \ --epochs ${train_epochs} \ --amp \ + --gpu ${RANK_ID} \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - +done wait ##################获取训练数据################ -- Gitee From 7f0b702addfb7eb3010ce6a3a714697f35b8c6dd Mon Sep 17 00:00:00 2001 From: 15972670948 <943012535@qq.com> Date: Mon, 12 Dec 2022 03:04:55 -0500 Subject: [PATCH 4/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?= =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?= =?UTF-8?q?ch][ECANet]--readme=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/classification/ECANet/README.md | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/PyTorch/contrib/cv/classification/ECANet/README.md b/PyTorch/contrib/cv/classification/ECANet/README.md index 047fb2b0c7..df7e463b89 100644 --- a/PyTorch/contrib/cv/classification/ECANet/README.md +++ b/PyTorch/contrib/cv/classification/ECANet/README.md @@ -1,10 +1,10 @@ # ECANet for PyTorch -- [概述](概述.md) -- [准备训练环境](准备训练环境.md) -- [开始训练](开始训练.md) -- [训练结果展示](训练结果展示.md) -- [版本说明](版本说明.md) +- [概述] +- [准备训练环境] +- [开始训练] +- [训练结果展示] +- [版本说明] @@ -18,6 +18,7 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 ``` url=https://github.com/BangguWu/ECANet.git + commit_id=b332f6b3e6e2afe8a3287dc8ee8440a0fbec74c4 ``` - 适配昇腾 AI 处理器的实现: @@ -30,8 +31,8 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 - 通过Git获取代码方法如下: ``` - git clone https://gitee.com/celianguai/ModelZoo-PyTorch.git # 克隆仓库的代码 - cd ./ModelZoo-PyTorch/PyTorch/contrib/cv/classification/ECANet # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + git clone {url} # 克隆仓库的代码 + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 ``` - 通过单击“立即下载”,下载源码包。 @@ -46,15 +47,16 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 | 配套 | 版本 | | ---------- | ------------------------------------------------------------ | - | 固件与驱动 | [1.0.15](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | - | CANN | [5.1.RC1](https://www.hiascend.com/software/cann/commercial?version=5.1.RC1) | - | PyTorch | [1.5.0](https://gitee.com/ascend/pytorch/tree/v1.5.0/) | + | 硬件 | [1.0.17](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | + | NPU固件与驱动 | [6.0.RC1](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | + | CANN | [6.0.RC1](https://www.hiascend.com/software/cann/commercial?version=6.0.RC1) | + | PyTorch | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) | - 环境准备指导。 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 -- 安装依赖(根据模型需求,按需添加所需依赖)。 +- 安装依赖。 ``` pip install -r requirements.txt @@ -63,9 +65,9 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 ## 准备数据集 -1. 获取数据集。 - 请用户自行准备好数据集,可选用ImageNet-1K数据集,包含train和val两部分,数据集目录结构参考如下所示。 + + 请用户自行准备好数据集,可选用ImageNet-1K数据集,将准备好的数据集解压放至服务器的任意目录下,包含train和val两部分,数据集目录结构参考如下所示。 ``` ├── ImageNet @@ -90,8 +92,6 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 │ ... ``` -2. 数据预处理(按需处理所需要的数据集)。 - # 开始训练 @@ -100,7 +100,7 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 1. 进入解压后的源码包根目录。 ``` - cd ./ECANet + cd /${模型文件夹名称} ``` 2. 运行训练脚本。 @@ -109,7 +109,6 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 - 单机单卡训练 - 启动单卡训练。 ``` # training 1p accuracy @@ -119,12 +118,11 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 bash ./test/train_performance_1p.sh --data_path=real_data_path # finetuning 1p - bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + bash ./test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path ``` - 单机8卡训练 - 启动8卡训练。 ``` # training 8p accuracy @@ -134,12 +132,12 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 bash ./test/train_performance_8p.sh --data_path=real_data_path #test 8p accuracy - bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + bash ./test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path ``` - --data_path参数填写数据集路径 --pth_path参数填写预训练模型路径 + 其中 :--data_path参数填写数据集的真实路径; --pth_path参数填写训练中保存的参数文件的真实路径。 - 模型训练脚本参数说明如下。 + 模型训练脚本参数说明如下: ``` 公共参数: @@ -170,9 +168,9 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 | NAME | Acc@1 | FPS | Epochs | AMP_Type | | ------- | ----- | ---: | ------ | -------: | | 1p-竞品 | - | 610 | 1 | - | -| 1p-NPU | - | 911 | 1 | O2 | +| 1p-NPU | - | 774.04 | 1 | O2 | | 8p-竞品 | 77.91 | 4200 | 100 | - | -| 8p-NPU | 78.30 | 6450 | 100 | O2 | +| 8p-NPU | 77.73 | 6924.75 | 100 | O2 | # 版本说明 @@ -181,6 +179,7 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效 2022.09.14:首次发布。 + ## 已知问题 -- Gitee