From c9e409f69c785a91e7b2e411ac2d77e6ddecfe42 Mon Sep 17 00:00:00 2001
From: celianguai <985261217@qq.com>
Date: Wed, 14 Sep 2022 20:17:24 +0800
Subject: [PATCH 1/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?=
 =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?=
 =?UTF-8?q?ch][ECANet]--=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../cv/classification/ECANet/.gitignore       |   2 +
 .../contrib/cv/classification/ECANet/LICENSE  |  52 ++
 .../cv/classification/ECANet/README.md        | 187 +++++
 .../cv/classification/ECANet/light_main.py    | 434 ++++++++++++
 .../contrib/cv/classification/ECANet/main.py  | 662 ++++++++++++++++++
 .../classification/ECANet/models/__init__.py  |  12 +
 .../ECANet/models/eca_mobilenetv2.py          | 139 ++++
 .../ECANet/models/eca_module.py               |  39 ++
 .../cv/classification/ECANet/models/eca_ns.py |  32 +
 .../ECANet/models/eca_resnet.py               | 220 ++++++
 .../classification/ECANet/modelzoo_level.txt  |   3 +
 .../cv/classification/ECANet/paras_flops.py   |  65 ++
 .../cv/classification/ECANet/requirements.txt |   3 +
 .../cv/classification/ECANet/test/env_npu.sh  |  76 ++
 .../ECANet/test/train_eval_8p.sh              | 123 ++++
 .../ECANet/test/train_finetune_1p.sh          | 152 ++++
 .../ECANet/test/train_full_1p.sh              | 127 ++++
 .../ECANet/test/train_full_8p.sh              | 127 ++++
 .../ECANet/test/train_performance_1p.sh       | 127 ++++
 .../ECANet/test/train_performance_8p.sh       | 130 ++++
 20 files changed, 2712 insertions(+)
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/.gitignore
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/LICENSE
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/README.md
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/light_main.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/main.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/__init__.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_module.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/paras_flops.py
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/requirements.txt
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh
 create mode 100644 PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh

diff --git a/PyTorch/contrib/cv/classification/ECANet/.gitignore b/PyTorch/contrib/cv/classification/ECANet/.gitignore
new file mode 100644
index 0000000000..f53d18e061
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+.vscode
diff --git a/PyTorch/contrib/cv/classification/ECANet/LICENSE b/PyTorch/contrib/cv/classification/ECANet/LICENSE
new file mode 100644
index 0000000000..7970c97fb4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/LICENSE
@@ -0,0 +1,52 @@
+MIT License
+
+Copyright (c) 2019 BangguWu, Qilong Wang 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+BSD 3-Clause License
+
+Copyright (c) 2020, BangguWu
+All rights reserved.
+Copyright 2022 Huawei Technologies Co., Ltd
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/PyTorch/contrib/cv/classification/ECANet/README.md b/PyTorch/contrib/cv/classification/ECANet/README.md
new file mode 100644
index 0000000000..047fb2b0c7
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/README.md
@@ -0,0 +1,187 @@
+# ECANet for PyTorch
+
+-   [概述](概述.md)
+-   [准备训练环境](准备训练环境.md)
+-   [开始训练](开始训练.md)
+-   [训练结果展示](训练结果展示.md)
+-   [版本说明](版本说明.md)
+
+
+
+# 概述
+
+## 简述
+
+ECANet是一个高效的图像分类网络。在残差网络上使用了高效通道注意力(ECA)模块，有效避免了降维对于通道注意力学习效果的影响。ECANet是以ResNet50为基础，增加了ECABottleneck层，该模型只涉及少数几个参数，但具有明显的效果增益。
+
+- 参考实现：
+
+  ```
+  url=https://github.com/BangguWu/ECANet.git
+  ```
+
+- 适配昇腾 AI 处理器的实现：
+
+  ```
+  url=https://gitee.com/ascend/ModelZoo-PyTorch.git
+  code_path=PyTorch/contrib/cv/classification
+  ```
+  
+- 通过Git获取代码方法如下：
+
+  ```
+  git clone https://gitee.com/celianguai/ModelZoo-PyTorch.git       # 克隆仓库的代码
+  cd ./ModelZoo-PyTorch/PyTorch/contrib/cv/classification/ECANet       # 切换到模型代码所在路径，若仓库下只有该模型，则无需切换
+  ```
+  
+- 通过单击“立即下载”，下载源码包。
+
+# 准备训练环境
+
+## 准备环境
+
+- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。
+
+  **表 1**  版本配套表
+
+  | 配套       | 版本                                                         |
+  | ---------- | ------------------------------------------------------------ |
+  | 固件与驱动 | [1.0.15](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
+  | CANN       | [5.1.RC1](https://www.hiascend.com/software/cann/commercial?version=5.1.RC1) |
+  | PyTorch    | [1.5.0](https://gitee.com/ascend/pytorch/tree/v1.5.0/) |
+
+- 环境准备指导。
+
+  请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。
+  
+- 安装依赖（根据模型需求，按需添加所需依赖）。
+
+  ```
+  pip install -r requirements.txt
+  ```
+
+
+## 准备数据集
+
+1. 获取数据集。
+
+   请用户自行准备好数据集，可选用ImageNet-1K数据集，包含train和val两部分，数据集目录结构参考如下所示。
+
+   ```
+   ├── ImageNet
+         ├──train
+              ├──类别1
+                    │──图片1
+                    │──图片2
+                    │   ...       
+              ├──类别2
+                    │──图片1
+                    │──图片2
+                    │   ...   
+              ├──...                     
+         ├──val  
+              ├──类别1
+                    │──图片1
+                    │──图片2
+                    │   ...       
+              ├──类别2
+                    │──图片1
+                    │──图片2
+                    │   ...              
+   ```
+
+2. 数据预处理（按需处理所需要的数据集）。
+
+
+# 开始训练
+
+## 训练模型
+
+1. 进入解压后的源码包根目录。
+
+   ```
+   cd ./ECANet 
+   ```
+
+2. 运行训练脚本。
+
+   该模型支持单机单卡训练和单机8卡训练。
+
+   - 单机单卡训练
+
+     启动单卡训练。
+
+     ```
+     # training 1p accuracy
+     bash ./test/train_full_1p.sh --data_path=real_data_path
+
+     # training 1p performance
+     bash ./test/train_performance_1p.sh --data_path=real_data_path
+
+     # finetuning 1p 
+     bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path    
+     ```
+
+   - 单机8卡训练
+
+     启动8卡训练。
+
+     ```
+     # training 8p accuracy
+     bash ./test/train_full_8p.sh --data_path=real_data_path
+
+     # training 8p performance
+     bash ./test/train_performance_8p.sh --data_path=real_data_path   
+
+     #test 8p accuracy
+     bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path   
+     ```
+
+   --data_path参数填写数据集路径 --pth_path参数填写预训练模型路径
+
+   模型训练脚本参数说明如下。
+
+    ```
+    公共参数：
+    --arch                                //使用模型，默认：eca_resnet50
+    --ksize                               //卷积核的大小，默认：3557
+    --workers                             //多线程读取数据集
+    --data                                //数据集路径
+    --device                              //训练设备类型
+    --gpu                                 //指定训练用卡编号
+    --epochs                              //重复训练次数
+    --batch-size                          //训练批次大小
+    --lr                                  //初始学习率，默认：0.1
+    --resume                              //中断重新开始模型参数路径
+    --pretrained                          //预训练模型路径
+    --amp                                 //是否使用混合精度
+
+    多卡训练参数：
+    --multiprocessing-distributed         //是否使用多卡训练
+    --device-list '0,1,2,3,4,5,6,7'       //多卡训练指定训练用卡
+    ```
+   
+   训练完成后，权重文件保存在当前路径下，并输出模型训练精度和性能信息。
+
+# 训练结果展示
+
+**表 2**  训练结果展示表
+
+| NAME    | Acc@1 |  FPS | Epochs | AMP_Type |
+| ------- | ----- | ---: | ------ | -------: |
+| 1p-竞品 | -     |  610 | 1      |        - |
+| 1p-NPU  | -     |  911 | 1      |       O2 |
+| 8p-竞品 | 77.91 | 4200 | 100    |        - |
+| 8p-NPU  | 78.30 | 6450 | 100    |       O2 |
+
+# 版本说明
+
+## 变更
+
+
+2022.09.14：首次发布。
+
+## 已知问题
+
+
+无。
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/light_main.py b/PyTorch/contrib/cv/classification/ECANet/light_main.py
new file mode 100644
index 0000000000..13df143e74
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/light_main.py
@@ -0,0 +1,434 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+import models
+from thop import profile
+
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('data', metavar='DIR',
+                    help='path to dataset')
+parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
+                    choices=model_names,
+                    help='model architecture: ' +
+                        ' | '.join(model_names) +
+                        ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=16, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=400, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=96, type=int,
+                    metavar='N', help='mini-batch size (default: 256)')
+parser.add_argument('--lr', '--learning-rate', default=0.045, type=float,
+                    metavar='LR', help='initial learning rate')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--weight-decay', '--wd', default=4e-5, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)')
+parser.add_argument('--print-freq', '-p', default=500, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--world-size', default=1, type=int,
+                    help='number of distributed processes')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='gloo', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--action', default='', type=str,
+                    help='other information.')
+                    
+
+best_prec1 = 0
+
+
+def main():
+    global args, best_prec1
+    args = parser.parse_args()
+
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+
+    args.distributed = args.world_size > 1
+
+    if args.distributed:
+        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                world_size=args.world_size)
+
+    # create model
+    if args.pretrained:
+        print("=> using pre-trained model '{}'".format(args.arch))
+        model = models.__dict__[args.arch](pretrained=True)
+    else:
+        print("=> creating model '{}'".format(args.arch))
+        model = models.__dict__[args.arch]()
+
+    if args.gpu is not None:
+        model = model.cuda(args.gpu)
+    elif args.distributed:
+        model.cuda()
+        model = torch.nn.parallel.DistributedDataParallel(model)
+
+    else:
+        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
+            model.features = torch.nn.DataParallel(model.features)
+            model.cuda()
+        else:
+            model = torch.nn.DataParallel(model).cuda()
+
+    print(model)
+
+    # get the number of models parameters
+    print('Number of models parameters: {}'.format(
+        sum([p.data.nelement() for p in model.parameters()])))
+    with torch.cuda.device(0):
+        net = models.__dict__[args.arch]()
+        # flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True)
+        flops, params = profile(net, inputs=(torch.randn(1,3,224,224),))
+        print('Flops:  ' + str(flops))
+        print('Params: ' + str(params))
+        del net, flops, params
+
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+
+    optimizer = torch.optim.SGD(model.parameters(), args.lr,
+                                momentum=args.momentum,
+                                weight_decay=args.weight_decay)
+
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            checkpoint = torch.load(args.resume)
+            args.start_epoch = checkpoint['epoch']
+            best_prec1 = checkpoint['best_prec1']
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+            del checkpoint
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+
+    cudnn.benchmark = True
+
+    # Data loading code
+    traindir = os.path.join(args.data, 'train')
+    valdir = os.path.join(args.data, 'val')
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+
+    train_dataset = datasets.ImageFolder(
+        traindir,
+        transforms.Compose([
+            transforms.RandomResizedCrop(224),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            normalize,
+        ]))
+
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+
+    val_loader = torch.utils.data.DataLoader(
+        datasets.ImageFolder(valdir, transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            normalize,
+        ])),
+        batch_size=args.batch_size, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+
+    if args.evaluate:
+        m = time.time()
+        _, _ =validate(val_loader, model, criterion)
+        n = time.time()
+        print((n-m)/3600)
+        return
+    
+    directory = "runs/%s/"%(args.arch + '_' + args.action)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    Loss_plot = {}
+    train_prec1_plot = {}
+    train_prec5_plot = {}
+    val_prec1_plot = {}
+    val_prec5_plot = {}
+
+    for epoch in range(args.start_epoch, args.epochs):
+        start_time = time.time()
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch)
+
+        # train for one epoch
+        # train(train_loader, model, criterion, optimizer, epoch)
+        loss_temp, train_prec1_temp, train_prec5_temp = train(train_loader, model, criterion, optimizer, epoch)
+        Loss_plot[epoch] = loss_temp
+        train_prec1_plot[epoch] = train_prec1_temp
+        train_prec5_plot[epoch] = train_prec5_temp
+
+        # evaluate on validation set
+        # prec1 = validate(val_loader, model, criterion)
+        prec1, prec5 = validate(val_loader, model, criterion)
+        val_prec1_plot[epoch] = prec1
+        val_prec5_plot[epoch] = prec5
+
+        # remember best prec@1 and save checkpoint
+        is_best = prec1 > best_prec1
+        best_prec1 = max(prec1, best_prec1)
+        save_checkpoint({
+            'epoch': epoch + 1,
+            'arch': args.arch,
+            'state_dict': model.state_dict(),
+            'best_prec1': best_prec1,
+            'optimizer' : optimizer.state_dict(),
+        }, is_best)
+        
+        # 将Loss,train_prec1,train_prec5,val_prec1,val_prec5用.txt的文件存起来
+        data_save(directory + 'Loss_plot.txt', Loss_plot)
+        data_save(directory + 'train_prec1.txt', train_prec1_plot)
+        data_save(directory + 'train_prec5.txt', train_prec5_plot)
+        data_save(directory + 'val_prec1.txt', val_prec1_plot)
+        data_save(directory + 'val_prec5.txt', val_prec5_plot)
+
+        end_time = time.time()
+        time_value = (end_time - start_time) / 3600
+        print("-" * 80)
+        print(time_value)
+        print("-" * 80)
+
+
+def train(train_loader, model, criterion, optimizer, epoch):
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+    top1 = AverageMeter()
+    top5 = AverageMeter()
+    losses_batch = {}
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    for i, (input, target) in enumerate(train_loader):
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        if args.gpu is not None:
+            input = input.cuda(args.gpu, non_blocking=True)
+        target = target.cuda(args.gpu, non_blocking=True)
+
+        # compute output
+        output = model(input)
+        loss = criterion(output, target)
+
+        # measure accuracy and record loss
+        prec1, prec5 = accuracy(output, target, topk=(1, 5))
+        losses.update(loss.item(), input.size(0))
+        top1.update(prec1[0], input.size(0))
+        top5.update(prec5[0], input.size(0))
+
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # measure elapsed time
+        batch_time.update(time.time() - end)
+        end = time.time()
+
+        if i % args.print_freq == 0:
+            print('Epoch: [{0}][{1}/{2}]\t'
+                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
+                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
+                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
+     
+                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
+                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
+                   epoch, i, len(train_loader), batch_time=batch_time,
+                   data_time=data_time, loss=losses, top1=top1, top5=top5))
+            losses_batch[epoch] = losses.avg
+            loss_batch = open("loss_batch.txt", 'a')
+            for line in losses_batch:
+                loss_batch.write(str(epoch) + " " + str(line) + " " + str(losses_batch[line]) + '\n')
+            loss_batch.close()
+
+    return losses.avg, top1.avg, top5.avg
+
+
+def validate(val_loader, model, criterion):
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+    top1 = AverageMeter()
+    top5 = AverageMeter()
+
+    # switch to evaluate mode
+    model.eval()
+
+    with torch.no_grad():
+        end = time.time()
+        for i, (input, target) in enumerate(val_loader):
+            if args.gpu is not None:
+                input = input.cuda(args.gpu, non_blocking=True)
+            target = target.cuda(args.gpu, non_blocking=True)
+
+            # compute output
+            output = model(input)
+            loss = criterion(output, target)
+
+            # measure accuracy and record loss
+            prec1, prec5 = accuracy(output, target, topk=(1, 5))
+            losses.update(loss.item(), input.size(0))
+            top1.update(prec1[0], input.size(0))
+            top5.update(prec5[0], input.size(0))
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            if i % args.print_freq == 0:
+                print('Test: [{0}/{1}]\t'
+                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
+                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
+                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
+                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
+                       i, len(val_loader), batch_time=batch_time, loss=losses,
+                       top1=top1, top5=top5))
+
+        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
+              .format(top1=top1, top5=top5))
+
+    return top1.avg, top5.avg
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+    directory = "runs/%s/"%(args.arch + '_' + args.action)
+    
+    filename = directory + filename
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, directory + 'model_best.pth.tar')
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def adjust_learning_rate(optimizer, epoch):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    # lr = args.lr * (0.1 ** (epoch // 30))
+    lr = args.lr * (0.98 ** epoch)
+    print('lr = ', lr)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+        res = []
+        for k in topk:
+            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+
+
+def data_save(root, file):
+    if not os.path.exists(root):
+        os.mknod(root)
+    file_temp = open(root, 'r')
+    lines = file_temp.readlines()
+    if not lines:
+        epoch = -1
+    else:
+        epoch = lines[-1][:lines[-1].index(' ')]
+    epoch = int(epoch)
+    file_temp.close()
+    file_temp = open(root, 'a')
+    for line in file:
+        if line > epoch:
+            file_temp.write(str(line) + " " + str(file[line]) + '\n')
+    file_temp.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/classification/ECANet/main.py b/PyTorch/contrib/cv/classification/ECANet/main.py
new file mode 100644
index 0000000000..c8a3c99c57
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/main.py
@@ -0,0 +1,662 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+import argparse
+import os
+from os.path import exists, join, split
+import random
+import shutil
+import time
+import warnings
+import json
+import logging
+import apex
+from apex import amp
+import threading
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import models
+
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--data', metavar='DIR',
+                    help='path to dataset')
+parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
+                    choices=model_names,
+                    help='model architecture: ' +
+                        ' | '.join(model_names) +
+                        ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=16, type=int, metavar='N',
+                    help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=100, type=int, metavar='N',
+                    help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+                    metavar='N', help='mini-batch size (default: 256)')
+parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
+                    metavar='LR', help='initial learning rate')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                    help='momentum')
+parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
+                    metavar='W', help='weight decay (default: 1e-4)')
+parser.add_argument('--print-freq', '-p', default=50, type=int,
+                    metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+                    help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+                    help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+                    help='use pre-trained model')
+parser.add_argument('--pth_path', default='', type=str, metavar='PATH',
+                    help='path to pretrained checkpoint (default: none)')
+parser.add_argument('--world_size', default=1, type=int,
+                    help='number of distributed processes')
+parser.add_argument('--rank',default=0,type=int,
+                    help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://127.0.0.1:9278', type=str,
+                    help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='hccl', type=str,
+                    help='distributed backend')
+parser.add_argument('--seed', default=None, type=int, nargs='+',
+                    help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+                    help='GPU id to use.')
+parser.add_argument('--multiprocessing_distributed', action='store_true',
+                    help='Use multi-processing distributed training to launch '
+                         'N processes per node, which has N GPUs. This is the '
+                         'fastest way to use PyTorch for either single node or '
+                         'multi node data parallel training')
+parser.add_argument('--num_classes', default=1000, type=int,
+                    help='The number of classes.')
+parser.add_argument('--ksize', default=None, type=list,
+                    help='Manually select the eca module kernel size')
+parser.add_argument('--action', default='', type=str,
+                    help='other information.')
+parser.add_argument('--amp', default=False, action='store_true',
+                    help='use amp to train the model')
+parser.add_argument('--prof', default=False, action='store_true',
+                    help='use profiling to evaluate the performance of model')
+parser.add_argument('--profling', default=False, action='store_true',
+                    help='use profiling to evaluate the performance of model')
+parser.add_argument('--device', default='gpu', type=str, help='npu or gpu')
+parser.add_argument('--addr', default='127.0.0.1',
+                    type=str, help='master addr')
+parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7',
+                    type=str, help='device id list')
+parser.add_argument('--warm_up_epochs', default=5, type=int,
+                    help='warm up')
+                    
+
+best_prec1=0
+
+def device_id_to_process_device_map(device_list):
+    devices = device_list.split(",")
+    devices = [int(x) for x in devices]
+    devices.sort()
+
+    process_device_map = dict()
+    for process_id, device_id in enumerate(devices):
+        process_device_map[process_id] = device_id
+
+    return process_device_map
+
+
+def main():
+    global args
+    args = parser.parse_args()
+    print(args.device_list)
+
+    os.environ['MASTER_ADDR'] = args.addr
+    os.environ['MASTER_PORT'] = '29998'
+    
+    if args.seed is not None:
+        random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        cudnn.deterministic = True
+        warnings.warn('You have chosen to seed training. '
+                      'This will turn on the CUDNN deterministic setting, '
+                      'which can slow down your training considerably! '
+                      'You may see unexpected behavior when restarting '
+                      'from checkpoints.')
+
+    if args.gpu is not None:
+        warnings.warn('You have chosen a specific GPU. This will completely '
+                      'disable data parallelism.')
+    
+    if args.dist_url == "env://" and args.world_size == -1:
+        args.world_size = int(os.environ["WORLD_SIZE"])
+    
+    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+
+    args.process_device_map = device_id_to_process_device_map(args.device_list)
+    
+    if args.device == 'npu':
+        ngpus_per_node = len(args.process_device_map)
+    else:
+        if args.distributed:
+            ngpus_per_node = torch.cuda.device_count()
+        else:
+            ngpus_per_node = 1
+    print('ngpus_per_node:', ngpus_per_node)
+
+    if args.multiprocessing_distributed:
+        args.world_size = ngpus_per_node * args.world_size
+        mp.spawn(main_worker, nprocs=ngpus_per_node,
+                 args=(ngpus_per_node, args))
+    else:
+        main_worker(args.gpu, ngpus_per_node, args)
+ 
+def main_worker(gpu, ngpus_per_node, args):
+
+    global best_prec1
+    args.gpu = args.process_device_map[gpu]
+
+    if args.gpu is not None:
+        print("Use GPU: {} for training".format(args.gpu))
+
+    if args.distributed:
+        if args.dist_url == "env://" and args.rank == -1:
+            args.rank = int(os.environ["RANK"])
+        if args.multiprocessing_distributed:
+            args.rank = args.rank * ngpus_per_node + gpu
+
+        if args.device == 'npu':
+            dist.init_process_group(backend=args.dist_backend, 
+                                    world_size=args.world_size, rank=args.rank)
+        else:
+            dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                    world_size=args.world_size, rank=args.rank)
+
+    # create model
+    if args.pretrained:
+        print("=> using pre-trained model '{}'".format(args.arch))
+        model = models.__dict__[args.arch](k_size=args.ksize, pretrained=True)
+        print("loading model of yours...")
+        if args.pth_path:
+            print("load pth you give")
+            pretrained_dict = torch.load(args.pth_path, map_location="cpu")["state_dict"]
+        else:
+            pretrained_dict = torch.load("./model_best.pth.tar", map_location="cpu")["state_dict"]
+        
+        if "fc.weight" in pretrained_dict:
+            print("pop fc layer weight")
+            pretrained_dict.pop('fc.weight')
+            pretrained_dict.pop('fc.bias')
+        model.load_state_dict(pretrained_dict, strict=False)
+    else:
+        print("=> creating model '{}'".format(args.arch))
+        if args.ksize == None:
+            model = models.__dict__[args.arch]()
+        else:
+            model = models.__dict__[args.arch](k_size=args.ksize)
+            
+    if args.distributed:
+        if args.gpu is not None:
+            if args.device == 'npu':
+                loc = 'npu:{}'.format(args.gpu)
+                torch.npu.set_device(loc)
+                model = model.to(loc)
+            else:
+                torch.cuda.set_device(args.gpu)
+                model.cuda(args.gpu)
+
+            args.batch_size = int(args.batch_size / args.world_size)
+            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
+        else:
+            if args.device == 'npu':
+                loc = 'npu:{}'.format(args.gpu)
+                model = model.to(loc)
+            else:
+                model.cuda()
+
+            print("[gpu id:", args.gpu, "]",
+                  "============================test   args.gpu is not None   else==========================")
+    elif args.gpu is not None:
+        print("[gpu id:", args.gpu, "]",
+              "============================test   elif args.gpu is not None:==========================")
+        if args.device == 'npu':
+            loc = 'npu:{}'.format(args.gpu)
+            torch.npu.set_device(args.gpu)
+            model = model.to(loc)
+        else:
+            torch.cuda.set_device(args.gpu)
+            model = model.cuda(args.gpu)
+
+    else:
+        print("[gpu id:", args.gpu, "]", "============================test   1==========================")
+        print("[gpu id:", args.gpu, "]", "============================test   3==========================")
+        if args.device == 'npu':
+            loc = 'npu:{}'.format(args.gpu)
+        else:
+            print("before : model = torch.nn.DataParallel(model).cuda()")
+
+    optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
+    
+    model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True)
+
+    if args.distributed:
+        if args.gpu is not None:
+
+            if args.pretrained:
+                model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False,
+                                                                  find_unused_parameters=True)
+            else:
+                model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False)
+        else:
+            print("[gpu id:", args.gpu, "]",
+                  "============================test   args.gpu is not None   else==========================")
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif args.gpu is not None:
+        print("[gpu id:", args.gpu, "]",
+              "============================test   elif args.gpu is not None:==========================")
+    else:
+        print("[gpu id:", args.gpu, "]", "============================test   1==========================")
+        print("[gpu id:", args.gpu, "]", "============================test   3==========================")
+        if args.device == 'npu':
+            loc = 'npu:{}'.format(args.gpu)
+            model = torch.nn.DataParallel(model).to(loc)
+        else:
+            model = torch.nn.DataParallel(model).cuda()
+
+    print(model)
+    
+    # get the number of models parameters
+    print('Number of models parameters: {}'.format(
+        sum([p.data.nelement() for p in model.parameters()])))
+
+    # define loss function (criterion) and optimizer
+    if args.device == 'npu':
+        loc = 'npu:{}'.format(args.gpu)
+        criterion = nn.CrossEntropyLoss().to(loc)
+    else:
+        criterion = nn.CrossEntropyLoss().cuda(args.gpu)
+
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            if args.gpu is None:
+                checkpoint = torch.load(args.resume)
+            else:
+                if args.device == 'npu':
+                    loc = 'npu:{}'.format(args.gpu)
+                else:
+                    loc = 'cuda:{}'.format(args.gpu)
+                checkpoint = torch.load(args.resume, map_location=loc)
+            args.start_epoch = checkpoint['epoch']
+            best_prec1 = checkpoint['best_prec1']
+            if args.gpu is not None:
+                best_prec1 = best_prec1.to(args.gpu)
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            if args.amp:
+                amp.load_state_dict(checkpoint['amp'])
+            print("=> loaded checkpoint '{}' (epoch {})"
+                  .format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+
+    cudnn.benchmark = True
+
+    # Data loading code
+    traindir = os.path.join(args.data, 'train')
+    valdir = os.path.join(args.data, 'val')
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+
+    train_dataset = datasets.ImageFolder(
+        traindir,
+        transforms.Compose([
+            transforms.RandomResizedCrop(224),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            normalize,
+        ]))
+
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
+        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+
+    val_loader = torch.utils.data.DataLoader(
+        datasets.ImageFolder(valdir, transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            normalize,
+        ])),
+        batch_size=args.batch_size, shuffle=False,
+        num_workers=args.workers, pin_memory=True)
+
+    if args.evaluate:
+        m = time.time()
+        _, _ =validate(val_loader, model, criterion)
+        n = time.time()
+        print((n-m)/3600)
+        return
+    
+    if args.prof:
+        profiling(train_loader, model, criterion, optimizer, args)
+        return
+        
+    if args.profling:
+        cann_profiling(train_loader, model, criterion, optimizer, args)
+        return
+    
+    FPS = 0
+    print("===> Training")
+    for epoch in range(args.start_epoch, args.epochs):
+        start_time = time.time()
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        adjust_learning_rate(optimizer, epoch, args)
+
+        # train for one epoch
+        epoch_FPS = train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node)
+
+        # evaluate on validation set
+        prec1 = validate(val_loader, model, criterion, args, ngpus_per_node)
+
+        # remember best prec@1 and save checkpoint
+        is_best = prec1 > best_prec1
+        best_prec1 = max(prec1, best_prec1)
+        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                                                    and args.rank % ngpus_per_node == 0):
+            if args.amp:
+                save_checkpoint({
+                    'epoch': epoch + 1,
+                    'arch': args.arch,
+                    'state_dict': model.state_dict(),
+                    'best_prec1': best_prec1,
+                    'optimizer' : optimizer.state_dict(),
+                    'amp': amp.state_dict(),
+                }, is_best)
+            else:
+                save_checkpoint({
+                    'epoch': epoch + 1,
+                    'arch': args.arch,
+                    'state_dict': model.state_dict(),
+                    'best_prec1': best_prec1,
+                    'optimizer' : optimizer.state_dict(),
+                }, is_best)
+    
+#print profile
+def profiling(train_loader, model, criterion, optimizer, args):
+    # switch to train mode
+    model.train()
+
+    def update(model, input, target, optimizer):
+        output = model(input)
+        loss = criterion(output, target)
+        optimizer.zero_grad()
+        if args.amp:
+            with amp.scale_loss(loss, optimizer) as scaled_loss:
+                scaled_loss.backward()
+        else:
+            loss.backward()
+        optimizer.step()
+
+    for step, (input, target) in enumerate(train_loader):
+        if args.device == 'npu':
+            loc = 'npu:{}'.format(args.gpu)
+            input = input.to(loc, non_blocking=True).to(torch.float)
+            target = target.to(torch.int32).to(loc, non_blocking=True)
+        else:
+            input = input.cuda(args.gpu, non_blocking=True)
+            target = target.cuda(args.gpu, non_blocking=True)
+            
+        if step < 5:
+            update(model, input, target, optimizer)
+        else:
+            if args.device == 'npu':
+                with torch.autograd.profiler.profile(use_npu=True) as prof:
+                    update(model, input, target, optimizer)
+            else:
+                with torch.autograd.profiler.profile(use_cuda=True) as prof:
+                    update(model, input, target, optimizer)
+            break
+            
+    prof.export_chrome_trace("output.prof")
+    
+def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node):
+    batch_time = AverageMeter('Time', ':6.3f')
+    data_time = AverageMeter('Data', ':6.3f')
+    losses = AverageMeter('Loss', ':6.4f')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    progress = ProgressMeter(
+        len(train_loader),
+        [batch_time, data_time, losses, top1, top5],
+        prefix="Epoch: [{}]".format(epoch))
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    for i, (input, target) in enumerate(train_loader):
+        # measure data loading time
+        data_time.update(time.time() - end)
+        
+        if args.device == 'npu':
+            loc = 'npu:{}'.format(args.gpu)
+            input = input.to(loc, non_blocking=True).to(torch.float)
+            target = target.to(torch.int32).to(loc, non_blocking=True)
+        else:
+            input = input.cuda(args.gpu, non_blocking=True)
+            target = target.cuda(args.gpu, non_blocking=True)
+        
+        # compute output
+        output = model(input)
+        loss = criterion(output, target)
+        
+        # measure accuracy and record loss
+        prec1, prec5 = accuracy(output, target, topk=(1, 5))
+        losses.update(loss.item(), input.size(0))
+        top1.update(prec1[0], input.size(0))
+        top5.update(prec5[0], input.size(0))
+        
+        
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        if args.amp:
+            with amp.scale_loss(loss, optimizer) as scaled_loss:
+                scaled_loss.backward()
+        else:
+            loss.backward()
+        optimizer.step()
+        
+        if args.device == 'npu':
+            torch.npu.synchronize()
+
+        # measure elapsed time
+        cost_time = time.time() - end
+        batch_time.update(cost_time)
+        end = time.time()
+
+        if i % args.print_freq == 0:
+            if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                                                        and args.rank % ngpus_per_node == 0):
+                progress.display(i)
+                
+    if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                                                and args.rank % ngpus_per_node == 0):
+        if batch_time.avg:
+            print("[gpu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size,
+                  'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format(
+                    args.batch_size * args.world_size / batch_time.avg))                
+                
+    epoch_FPS = args.batch_size * args.world_size / batch_time.avg
+    print(f"train_one_epoch FPS: {epoch_FPS}")
+    return epoch_FPS
+
+
+def validate(val_loader, model, criterion, args, ngpus_per_node):
+    batch_time = AverageMeter('Time', ':6.3f')
+    losses = AverageMeter('Loss', ':6.4f')
+    top1 = AverageMeter('Acc@1', ':6.2f')
+    top5 = AverageMeter('Acc@5', ':6.2f')
+    progress = ProgressMeter(
+        len(val_loader),
+        [batch_time, losses, top1, top5],
+        prefix='Test: ')
+
+    # switch to evaluate mode
+    model.eval()
+
+    with torch.no_grad():
+        end = time.time()
+        for i, (input, target) in enumerate(val_loader):
+            if args.gpu is not None:
+                if args.device == 'npu':
+                    loc = 'npu:{}'.format(args.gpu)
+                    input = input.to(loc).to(torch.float)
+                else:
+                    input = input.cuda(args.gpu, non_blocking=True)
+            if args.device == 'npu':
+                loc = 'npu:{}'.format(args.gpu)
+                target = target.to(torch.int32).to(loc, non_blocking=True)
+            else:
+                target = target.cuda(args.gpu, non_blocking=True)
+
+            # compute output
+            output = model(input)
+            loss = criterion(output, target)
+
+            # measure accuracy and record loss
+            prec1, prec5 = accuracy(output, target, topk=(1, 5))
+            losses.update(loss.item(), input.size(0))
+            top1.update(prec1[0], input.size(0))
+            top5.update(prec5[0], input.size(0))
+
+            # measure elapsed time
+            cost_time = time.time() - end
+            batch_time.update(cost_time)
+            end = time.time()
+            
+            if i % args.print_freq == 0:
+                if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                                                            and args.rank % ngpus_per_node == 0):
+                    progress.display(i)
+
+        if i % args.print_freq == 0:
+            if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+                                                        and args.rank % ngpus_per_node == 0):
+                print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))
+        
+    return top1.avg
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, 'model_best.pth.tar')
+        
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self, name, fmt=':f', start_count_index=2):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+        self.start_count_index = start_count_index
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        if self.count == 0:
+            self.N = n
+
+        self.val = val
+        self.count += n
+        if self.count > (self.start_count_index * self.N):
+            self.sum += val * n
+            self.avg = self.sum / (self.count - self.start_count_index * self.N)
+
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+        
+class ProgressMeter(object):
+
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    
+    if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs:
+        lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1))
+    else:
+        alpha = 0
+        cosine_decay = 0.5 * (
+                1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs)))
+        decayed = (1 - alpha) * cosine_decay + alpha
+        lr = args.lr * decayed
+
+    print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr))
+    
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+        res = []
+        for k in topk:
+            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+        return res
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/classification/ECANet/models/__init__.py b/PyTorch/contrib/cv/classification/ECANet/models/__init__.py
new file mode 100644
index 0000000000..52bcde7185
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/models/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+from .eca_resnet import *
+from .eca_mobilenetv2 import *
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py
new file mode 100644
index 0000000000..01a4dd8d63
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_mobilenetv2.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+from torch import nn
+from .eca_module import eca_layer
+
+__all__ = ['ECA_MobileNetV2', 'eca_mobilenet_v2']
+
+
+model_urls = {
+    'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
+}
+
+
+class ConvBNReLU(nn.Sequential):
+    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
+        padding = (kernel_size - 1) // 2
+        super(ConvBNReLU, self).__init__(
+            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
+            nn.BatchNorm2d(out_planes),
+            nn.ReLU6(inplace=True)
+        )
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, expand_ratio, k_size):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+
+        hidden_dim = int(round(inp * expand_ratio))
+        self.use_res_connect = self.stride == 1 and inp == oup
+
+        layers = []
+        if expand_ratio != 1:
+            # pw
+            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
+        layers.extend([
+            # dw
+            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
+            # pw-linear
+            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        ])
+        layers.append(eca_layer(oup, k_size))
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class ECA_MobileNetV2(nn.Module):
+    def __init__(self, num_classes=1000, width_mult=1.0):
+        super(ECA_MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        last_channel = 1280
+        inverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+
+        # building first layer
+        input_channel = int(input_channel * width_mult)
+        self.last_channel = int(last_channel * max(1.0, width_mult))
+        features = [ConvBNReLU(3, input_channel, stride=2)]
+        # building inverted residual blocks
+        for t, c, n, s in inverted_residual_setting:
+            output_channel = int(c * width_mult)
+            for i in range(n):
+                if c < 96:
+                    ksize = 1
+                else:
+                    ksize = 3
+                stride = s if i == 0 else 1
+                features.append(block(input_channel, output_channel, stride, expand_ratio=t, k_size=ksize))
+                input_channel = output_channel
+        # building last several layers
+        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
+        # make it nn.Sequential
+        self.features = nn.Sequential(*features)
+
+        # building classifier
+        self.classifier = nn.Sequential(
+            nn.Dropout(0.25),
+            nn.Linear(self.last_channel, num_classes),
+        )
+
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+
+    def forward(self, x):
+        x = self.features(x)
+        x = x.mean(-1).mean(-1)
+        x = self.classifier(x)
+        return x
+
+
+def eca_mobilenet_v2(pretrained=False, progress=True, **kwargs):
+    """
+    Constructs a ECA_MobileNetV2 architecture from
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    model = ECA_MobileNetV2(**kwargs)
+    # if pretrained:
+    #     state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
+    #                                           progress=progress)
+    #     model.load_state_dict(state_dict)
+    return model
diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_module.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_module.py
new file mode 100644
index 0000000000..0312550059
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_module.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+import torch
+from torch import nn
+from torch.nn.parameter import Parameter
+
+class eca_layer(nn.Module):
+    """Constructs a ECA module.
+
+    Args:
+        channel: Number of channels of the input feature map
+        k_size: Adaptive selection of kernel size
+    """
+    def __init__(self, channel, k_size=3):
+        super(eca_layer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        # feature descriptor on the global spatial information
+        y = self.avg_pool(x)
+
+        # Two different branches of ECA module
+        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
+
+        # Multi-scale information fusion
+        y = self.sigmoid(y)
+
+        return x * y.expand_as(x)
+        
diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py
new file mode 100644
index 0000000000..aefd239d56
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_ns.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+import torch
+import time
+from torch import nn
+
+
+class eca_layer(nn.Module):
+    def __init__(self, channel, k_size):
+        super(eca_layer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.k_size = k_size
+        self.conv = nn.Conv1d(channel, channel, kernel_size=k_size, bias=False, groups=channel)
+        self.sigmoid = nn.Sigmoid()
+
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x)
+        y = nn.functional.unfold(y.transpose(-1, -3), kernel_size=(1, self.k_size), padding=(0, (self.k_size - 1) // 2))
+        y = self.conv(y.transpose(-1, -2)).unsqueeze(-1)
+        y = self.sigmoid(y)
+        x = x * y.expand_as(x)
+        return x
diff --git a/PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py b/PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py
new file mode 100644
index 0000000000..e5ee62b459
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/models/eca_resnet.py
@@ -0,0 +1,220 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+import torch.nn as nn
+import math
+# import torch.utils.model_zoo as model_zoo
+from .eca_module import eca_layer
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class ECABasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, k_size=3):
+        super(ECABasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes, 1)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.eca = eca_layer(planes, k_size)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.eca(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ECABottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, k_size=3):
+        super(ECABottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.eca = eca_layer(planes * 4, k_size)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.eca(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, k_size=[3, 3, 3, 3]):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], int(k_size[0]))
+        self.layer2 = self._make_layer(block, 128, layers[1], int(k_size[1]), stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], int(k_size[2]), stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], int(k_size[3]), stride=2)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, k_size, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, k_size))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, k_size=k_size))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def eca_resnet18(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False):
+    """Constructs a ResNet-18 model.
+
+    Args:
+        k_size: Adaptive selection of kernel size
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        num_classes:The classes of classification
+    """
+    model = ResNet(ECABasicBlock, [2, 2, 2, 2], num_classes=num_classes, k_size=k_size)
+    model.avgpool = nn.AdaptiveAvgPool2d(1)
+    return model
+
+
+def eca_resnet34(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False):
+    """Constructs a ResNet-34 model.
+
+    Args:
+        k_size: Adaptive selection of kernel size
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        num_classes:The classes of classification
+    """
+    model = ResNet(ECABasicBlock, [3, 4, 6, 3], num_classes=num_classes, k_size=k_size)
+    model.avgpool = nn.AdaptiveAvgPool2d(1)
+    return model
+
+
+def eca_resnet50(k_size=[3, 3, 3, 3], num_classes=1000, pretrained=False):
+    """Constructs a ResNet-50 model.
+
+    Args:
+        k_size: Adaptive selection of kernel size
+        num_classes:The classes of classification
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    print("Constructing eca_resnet50......")
+    model = ResNet(ECABottleneck, [3, 4, 6, 3], num_classes=num_classes, k_size=k_size)
+    model.avgpool = nn.AdaptiveAvgPool2d(1)
+    return model
+
+
+def eca_resnet101(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False):
+    """Constructs a ResNet-101 model.
+
+    Args:
+        k_size: Adaptive selection of kernel size
+        num_classes:The classes of classification
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(ECABottleneck, [3, 4, 23, 3], num_classes=num_classes, k_size=k_size)
+    model.avgpool = nn.AdaptiveAvgPool2d(1)
+    return model
+
+
+def eca_resnet152(k_size=[3, 3, 3, 3], num_classes=1_000, pretrained=False):
+    """Constructs a ResNet-152 model.
+
+    Args:
+        k_size: Adaptive selection of kernel size
+        num_classes:The classes of classification
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(ECABottleneck, [3, 8, 36, 3], num_classes=num_classes, k_size=k_size)
+    model.avgpool = nn.AdaptiveAvgPool2d(1)
+    return model
diff --git a/PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt
new file mode 100644
index 0000000000..0b49b4fb26
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/modelzoo_level.txt
@@ -0,0 +1,3 @@
+FuncStatus:OK
+PerfStatus:OK
+PrecisionStatus:OK
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/paras_flops.py b/PyTorch/contrib/cv/classification/ECANet/paras_flops.py
new file mode 100644
index 0000000000..9665a6a4a0
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/paras_flops.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2020, Banggu Wu
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ECANet:https://github.com/BangguWu/ECANet
+# --------------------------------------------------------
+import torch
+# import test_models as models
+from thop import profile
+import torchvision
+import models
+import argparse
+
+
+model_names = sorted(name for name in models.__dict__
+    if name.islower() and not name.startswith("__")
+    and callable(models.__dict__[name]))
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('--arch', '-a', metavar='ARCH', default='eca_resnet50',
+                    choices=model_names,
+                    help='model architecture: ' +
+                        ' | '.join(model_names) +
+                        ' (default: eca_resnet50)')
+
+def main():
+    global args
+    args = parser.parse_args()
+    model = models.__dict__[args.arch]()    
+    print(model)
+    input = torch.randn(1, 3, 224, 224)
+    model.train()
+    # model.eval()
+    flops, params = profile(model, inputs=(input, ))
+    print("flops = ", flops)
+    print("params = ", params)
+    flops, params = clever_format([flops, params], "%.3f")
+    print("flops = ", flops)
+    print("params = ", params)
+
+def clever_format(nums, format="%.2f"):
+    clever_nums = []
+
+    for num in nums:
+        if num > 1e12:
+            clever_nums.append(format % (num / 1024 ** 4) + "T")
+        elif num > 1e9:
+            clever_nums.append(format % (num / 1024 ** 3) + "G")
+        elif num > 1e6:
+            clever_nums.append(format % (num / 1024 ** 2) + "M")
+        elif num > 1e3:
+            clever_nums.append(format % (num / 1024) + "K")
+        else:
+            clever_nums.append(format % num + "B")
+
+    clever_nums = clever_nums[0] if len(clever_nums) == 1 else (*clever_nums, )
+
+    return clever_nums
+
+
+if __name__ == '__main__':
+    main()
diff --git a/PyTorch/contrib/cv/classification/ECANet/requirements.txt b/PyTorch/contrib/cv/classification/ECANet/requirements.txt
new file mode 100644
index 0000000000..5d97f76367
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/requirements.txt
@@ -0,0 +1,3 @@
+numpy
+apex
+tqdm
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh b/PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh
new file mode 100644
index 0000000000..4740fafdcc
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/env_npu.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+export install_path=/usr/local/Ascend
+
+if [ -d ${install_path}/toolkit ]; then
+    export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
+    export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
+    export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
+    export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
+    export ASCEND_OPP_PATH=${install_path}/opp
+else
+    if [ -d ${install_path}/nnae/latest ];then
+        export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
+        export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
+        export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
+        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+        export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+        export ASCEND_AICPU_PATH=${install_path}/nnae/latest
+    else
+        export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
+        export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
+        export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
+        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+        export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+        export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
+    fi
+fi
+
+${install_path}/driver/tools/msnpureport -g error -d 0
+${install_path}/driver/tools/msnpureport -g error -d 1
+${install_path}/driver/tools/msnpureport -g error -d 2
+${install_path}/driver/tools/msnpureport -g error -d 3
+${install_path}/driver/tools/msnpureport -g error -d 4
+${install_path}/driver/tools/msnpureport -g error -d 5
+${install_path}/driver/tools/msnpureport -g error -d 6
+${install_path}/driver/tools/msnpureport -g error -d 7
+
+#将Host日志输出到串口,0-关闭/1-开启
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+#设置默认日志级别,0-debug/1-info/2-warning/3-error
+export ASCEND_GLOBAL_LOG_LEVEL=3
+#设置Event日志开启标志,0-关闭/1-开启
+export ASCEND_GLOBAL_EVENT_ENABLE=0
+#设置是否开启taskque,0-关闭/1-开启
+export TASK_QUEUE_ENABLE=1
+#设置是否开启PTCopy,0-关闭/1-开启
+export PTCOPY_ENABLE=1
+#设置是否开启combined标志,0-关闭/1-开启
+export COMBINED_ENABLE=0
+#设置特殊场景是否需要重新编译,不需要修改
+export DYNAMIC_OP="ADD#MUL"
+#HCCL白名单开关,1-关闭/0-开启
+export HCCL_WHITELIST_DISABLE=1
+export HCCL_IF_IP=$(hostname -I |awk '{print $1}')
+
+ulimit -SHn 512000
+
+path_lib=$(python3.7 -c """
+import sys
+import re
+result=''
+for index in range(len(sys.path)):
+    match_sit = re.search('-packages', sys.path[index])
+    if match_sit is not None:
+        match_lib = re.search('lib', sys.path[index])
+
+        if match_lib is not None:
+            end=match_lib.span()[1]
+            result += sys.path[index][0:end] + ':'
+
+        result+=sys.path[index] + '/torch/lib:'
+print(result)"""
+)
+
+echo ${path_lib}
+
+export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh
new file mode 100644
index 0000000000..4c46f099ce
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size resume RANK_SIZE
+# 网络名称，同目录名称
+Network="ECANet"
+# 训练batch_size
+batch_size=2048
+# 训练使用的npu卡数
+export RANK_SIZE=8
+# 数据集路径,保持为空,不需要修改
+data_path_info=$1
+data_path=`echo ${data_path_info#*=}`
+# checkpoint文件路径,以实际路径为准
+pth_path=""
+
+
+
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --workers* ]];then
+        workers=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --pth_path* ]];then
+        pth_path=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 校验是否传入 pth_path , 验证脚本需要传入此参数
+if [[ $pth_path == "" ]];then
+    echo "[Error] para \"pth_path\" must be confing"
+    exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_diename=${cur_path##*/}
+if [ x"${cur_path_last_diename}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+#################创建日志输出目录，不需要修改#################
+ASCEND_DEVICE_ID=0
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+nohup python3.7 main.py \
+    --workers 184\
+    --arch eca_resnet50 \
+    --data ${data_path} \
+    --ksize 3557 \
+    --multiprocessing_distributed \
+    --device npu \
+    --batch-size ${batch_size} \
+    --resume ${pth_path} \
+    --amp \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+
+wait
+
+
+##################获取训练数据################
+# 训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+
+# 输出训练精度,需要模型审视修改
+train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+
+# 最后一个迭代loss值，不需要修改
+ActualLoss=`grep Test ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log | awk '{print $8}' | awk 'END {print}'`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh
new file mode 100644
index 0000000000..96ff1ee826
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="ECANet"
+# 训练batch_size
+batch_size=256
+# 训练使用的npu卡数
+export RANK_SIZE=1
+
+# 数据集路径,保持为空,不需要修改
+data_path_info=$1
+data_path=`echo ${data_path_info#*=}`
+# checkpoint文件路径,以实际路径为准
+pth_path=""
+
+device_id=0
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --pth_path* ]];then
+        pth_path=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 校验是否传入 pth_path , 验证脚本需要传入此参数
+if [[ $pth_path == "" ]];then
+    echo "[Error] para \"pth_path\" must be confing"
+    exit 1
+fi
+
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    "[Error] device id must be config"
+    exit 1
+fi
+
+
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+#################创建日志输出目录，不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 main.py \
+    --arch eca_resnet50 \
+    --data ${data_path} \
+    --ksize 3557 \
+    --workers 16 \
+    --lr 0.1 \
+    --device npu \
+    --gpu 0 \
+    --pretrained \
+    --pth_path ${pth_path} \
+    --batch-size ${batch_size} \
+    --amp \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+
+##################获取训练数据################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh
new file mode 100644
index 0000000000..c85a4a83bf
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="ECANet"
+# 训练batch_size
+batch_size=256
+# 训练使用的npu卡数
+export RANK_SIZE=1
+
+data_path_info=$1
+data_path=`echo ${data_path_info#*=}`
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --workers* ]];then
+        workers=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_diename=${cur_path##*/}
+if [ x"${cur_path_last_diename}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+#################创建日志输出目录，不需要修改#################
+ASCEND_DEVICE_ID=0
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 main.py \
+    --workers 16\
+    --arch eca_resnet50 \
+    --epochs 100 \
+    --data ${data_path} \
+    --ksize 3557 \
+    --device npu \
+    --gpu 0 \
+    --batch-size ${batch_size} \
+    --lr 0.1 \
+    --amp \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
new file mode 100644
index 0000000000..bc11125119
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="ECANet"
+# 训练batch_size
+batch_size=2048
+# 训练使用的npu卡数
+export RANK_SIZE=8
+
+data_path_info=$1
+data_path=`echo ${data_path_info#*=}`
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --workers* ]];then
+        workers=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_diename=${cur_path##*/}
+if [ x"${cur_path_last_diename}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+#################创建日志输出目录，不需要修改#################
+ASCEND_DEVICE_ID=0
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 main.py \
+    --workers 184\
+    --arch eca_resnet50 \
+    --epochs 100 \
+    --data ${data_path} \
+    --ksize 3557 \
+    --multiprocessing_distributed \
+    --device npu \
+    --batch-size ${batch_size} \
+    --lr 0.8 \
+    --amp \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh
new file mode 100644
index 0000000000..639606b904
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="ECANet"
+# 训练batch_size
+batch_size=256
+# 训练使用的npu卡数
+export RANK_SIZE=1
+
+data_path_info=$1
+data_path=`echo ${data_path_info#*=}`
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --workers* ]];then
+        workers=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_diename=${cur_path##*/}
+if [ x"${cur_path_last_diename}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+#################创建日志输出目录，不需要修改#################
+ASCEND_DEVICE_ID=0
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 main.py \
+    --workers 16\
+    --arch eca_resnet50 \
+    --epochs 1 \
+    --data ${data_path} \
+    --ksize 3557 \
+    --device npu \
+    --gpu 0 \
+    --batch-size ${batch_size} \
+    --lr 0.1 \
+    --amp \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
new file mode 100644
index 0000000000..3c17675d9c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="ECANet"
+# 训练batch_size
+batch_size=2048
+# 训练使用的npu卡数
+export RANK_SIZE=8
+
+# 训练epoch
+train_epochs=1
+
+data_path_info=$1
+data_path=`echo ${data_path_info#*=}`
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --workers* ]];then
+        workers=`echo ${para#*=}`
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_diename=${cur_path##*/}
+if [ x"${cur_path_last_diename}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+
+#################创建日志输出目录，不需要修改#################
+ASCEND_DEVICE_ID=0
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3.7 main.py \
+    --workers 184\
+    --arch eca_resnet50 \
+    --data ${data_path} \
+    --ksize 3557 \
+    --multiprocessing_distributed \
+    --device npu \
+    --batch-size ${batch_size} \
+    --lr 0.8 \
+    --epochs ${train_epochs} \
+    --amp \
+    > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee


From 590c20877b2392d109dcd78a989b88c1a05e3e8a Mon Sep 17 00:00:00 2001
From: celianguai <985261217@qq.com>
Date: Fri, 18 Nov 2022 13:04:36 +0800
Subject: [PATCH 2/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?=
 =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?=
 =?UTF-8?q?ch][ECANet]--=E8=84=9A=E6=9C=AC=E6=96=87=E4=BB=B6=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ECANet/test/train_eval_8p.sh              | 30 +++++++++++++++----
 .../ECANet/test/train_finetune_1p.sh          |  2 +-
 .../ECANet/test/train_full_1p.sh              |  4 +--
 .../ECANet/test/train_full_8p.sh              |  4 +--
 .../ECANet/test/train_performance_1p.sh       |  4 +--
 .../ECANet/test/train_performance_8p.sh       |  6 ++--
 6 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh
index 4c46f099ce..bf484f5ace 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_eval_8p.sh
@@ -78,6 +78,7 @@ nohup python3.7 main.py \
     --arch eca_resnet50 \
     --data ${data_path} \
     --ksize 3557 \
+    --lr 0.8 \
     --multiprocessing_distributed \
     --device npu \
     --batch-size ${batch_size} \
@@ -89,28 +90,43 @@ nohup python3.7 main.py \
 wait
 
 
+
 ##################获取训练数据################
-# 训练结束时间，不需要修改
+
+#训练结束时间，不需要修改
 end_time=$(date +%s)
 e2e_time=$(( $end_time - $start_time ))
 
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
 
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -a 'Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
 
-# 训练用例信息，不需要修改
+#性能看护结果汇总
+#训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
 CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
 
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
-# 最后一个迭代loss值，不需要修改
-ActualLoss=`grep Test ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log | awk '{print $8}' | awk 'END {print}'`
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
 
 #关键信息打印到${CaseName}.log中，不需要修改
 echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
@@ -118,6 +134,8 @@ echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${C
 echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh
index 96ff1ee826..dc69f28035 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_finetune_1p.sh
@@ -116,7 +116,7 @@ FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a 'Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh
index c85a4a83bf..c4361aa57b 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_1p.sh
@@ -91,7 +91,7 @@ FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a 'Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
@@ -109,7 +109,7 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
 ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
index bc11125119..a0e98a2e4f 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
@@ -91,7 +91,7 @@ FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a 'Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
@@ -109,7 +109,7 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
 ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh
index 639606b904..1de4f84e67 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_1p.sh
@@ -91,7 +91,7 @@ FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a 'Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
@@ -109,7 +109,7 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
 ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
index 3c17675d9c..c802c075bc 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
@@ -94,7 +94,7 @@ FPS=`grep -a 'FPS'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D
 echo "Final Performance images/sec : $FPS"
 
 #输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
+train_accuracy=`grep -a 'Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
 #打印，不需要修改
 echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
@@ -112,7 +112,7 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Train: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
 ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
@@ -127,4 +127,4 @@ echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${
 echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
-- 
Gitee


From 4ab44e75ea0186dda12d75ce19202c47ceda35b2 Mon Sep 17 00:00:00 2001
From: 15972670948 <943012535@qq.com>
Date: Sat, 10 Dec 2022 05:42:59 -0500
Subject: [PATCH 3/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?=
 =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?=
 =?UTF-8?q?ch][ECANet]--=E8=84=9A=E6=9C=AC=E6=96=87=E4=BB=B6=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../contrib/cv/classification/ECANet/main.py  | 10 ++--------
 .../ECANet/test/train_full_8p.sh              | 19 +++++++++++++------
 .../ECANet/test/train_performance_8p.sh       | 14 ++++++++++----
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/PyTorch/contrib/cv/classification/ECANet/main.py b/PyTorch/contrib/cv/classification/ECANet/main.py
index c8a3c99c57..7be5661e0d 100644
--- a/PyTorch/contrib/cv/classification/ECANet/main.py
+++ b/PyTorch/contrib/cv/classification/ECANet/main.py
@@ -160,13 +160,7 @@ def main():
         else:
             ngpus_per_node = 1
     print('ngpus_per_node:', ngpus_per_node)
-
-    if args.multiprocessing_distributed:
-        args.world_size = ngpus_per_node * args.world_size
-        mp.spawn(main_worker, nprocs=ngpus_per_node,
-                 args=(ngpus_per_node, args))
-    else:
-        main_worker(args.gpu, ngpus_per_node, args)
+    main_worker(args.gpu, ngpus_per_node, args)
  
 def main_worker(gpu, ngpus_per_node, args):
 
@@ -175,7 +169,7 @@ def main_worker(gpu, ngpus_per_node, args):
 
     if args.gpu is not None:
         print("Use GPU: {} for training".format(args.gpu))
-
+    args.world_size = int(os.environ["WORLD_SIZE"])
     if args.distributed:
         if args.dist_url == "env://" and args.rank == -1:
             args.rank = int(os.environ["RANK"])
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
index a0e98a2e4f..c8a1bcced9 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_full_8p.sh
@@ -8,6 +8,8 @@ Network="ECANet"
 batch_size=2048
 # 训练使用的npu卡数
 export RANK_SIZE=8
+export WORLD_SIZE=8
+# 训练epoch
 
 data_path_info=$1
 data_path=`echo ${data_path_info#*=}`
@@ -62,10 +64,14 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-nohup python3.7 main.py \
-    --workers 184\
+for((RANK_ID=0;RANK_ID<8;RANK_ID++))
+do
+    KERNEL_NUM=$(($(nproc)/$RANK_SIZE))
+    PID_START=$((KERNEL_NUM * RANK_ID))
+    PID_END=$((PID_START + KERNEL_NUM -1 ))
+    nohup taskset -c $PID_START-$PID_END python3.7 main.py \
+    --workers 184 \
     --arch eca_resnet50 \
-    --epochs 100 \
     --data ${data_path} \
     --ksize 3557 \
     --multiprocessing_distributed \
@@ -73,8 +79,9 @@ nohup python3.7 main.py \
     --batch-size ${batch_size} \
     --lr 0.8 \
     --amp \
+    --gpu ${RANK_ID} \
     > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
+done
 wait
 
 ##################获取训练数据################
@@ -109,7 +116,7 @@ ActualFPS=${FPS}
 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
 
 #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
 
 #最后一个迭代loss值，不需要修改
 ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
@@ -124,4 +131,4 @@ echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${
 echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
 echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
index c802c075bc..53dd65b2ec 100644
--- a/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/classification/ECANet/test/train_performance_8p.sh
@@ -8,7 +8,7 @@ Network="ECANet"
 batch_size=2048
 # 训练使用的npu卡数
 export RANK_SIZE=8
-
+export WORLD_SIZE=8
 # 训练epoch
 train_epochs=1
 
@@ -65,8 +65,13 @@ if [ x"${etp_flag}" != x"true" ];then
     source ${test_path_dir}/env_npu.sh
 fi
 
-nohup python3.7 main.py \
-    --workers 184\
+for((RANK_ID=0;RANK_ID<8;RANK_ID++))
+do
+    KERNEL_NUM=$(($(nproc)/$RANK_SIZE))
+    PID_START=$((KERNEL_NUM * RANK_ID))
+    PID_END=$((PID_START + KERNEL_NUM -1 ))
+    nohup taskset -c $PID_START-$PID_END python3.7 main.py \
+    --workers 184 \
     --arch eca_resnet50 \
     --data ${data_path} \
     --ksize 3557 \
@@ -76,8 +81,9 @@ nohup python3.7 main.py \
     --lr 0.8 \
     --epochs ${train_epochs} \
     --amp \
+    --gpu ${RANK_ID} \
     > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
+done
 wait
 
 ##################获取训练数据################
-- 
Gitee


From 7f0b702addfb7eb3010ce6a3a714697f35b8c6dd Mon Sep 17 00:00:00 2001
From: 15972670948 <943012535@qq.com>
Date: Mon, 12 Dec 2022 03:04:55 -0500
Subject: [PATCH 4/4] =?UTF-8?q?[=E8=A5=BF=E5=AE=89=E4=BA=A4=E9=80=9A?=
 =?UTF-8?q?=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTor?=
 =?UTF-8?q?ch][ECANet]--readme=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../cv/classification/ECANet/README.md        | 47 +++++++++----------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/PyTorch/contrib/cv/classification/ECANet/README.md b/PyTorch/contrib/cv/classification/ECANet/README.md
index 047fb2b0c7..df7e463b89 100644
--- a/PyTorch/contrib/cv/classification/ECANet/README.md
+++ b/PyTorch/contrib/cv/classification/ECANet/README.md
@@ -1,10 +1,10 @@
 # ECANet for PyTorch
 
--   [概述](概述.md)
--   [准备训练环境](准备训练环境.md)
--   [开始训练](开始训练.md)
--   [训练结果展示](训练结果展示.md)
--   [版本说明](版本说明.md)
+-   [概述]
+-   [准备训练环境]
+-   [开始训练]
+-   [训练结果展示]
+-   [版本说明]
 
 
@@ -18,6 +18,7 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 
   ```
   url=https://github.com/BangguWu/ECANet.git
+  commit_id=b332f6b3e6e2afe8a3287dc8ee8440a0fbec74c4
   ```
 
 - 适配昇腾 AI 处理器的实现：
@@ -30,8 +31,8 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 - 通过Git获取代码方法如下：
 
   ```
-  git clone https://gitee.com/celianguai/ModelZoo-PyTorch.git       # 克隆仓库的代码
-  cd ./ModelZoo-PyTorch/PyTorch/contrib/cv/classification/ECANet       # 切换到模型代码所在路径，若仓库下只有该模型，则无需切换
+  git clone {url} # 克隆仓库的代码
+  cd {code_path} # 切换到模型代码所在路径，若仓库下只有该模型，则无需切换
   ```
   
 - 通过单击“立即下载”，下载源码包。
@@ -46,15 +47,16 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 
   | 配套       | 版本                                                         |
   | ---------- | ------------------------------------------------------------ |
-  | 固件与驱动 | [1.0.15](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
-  | CANN       | [5.1.RC1](https://www.hiascend.com/software/cann/commercial?version=5.1.RC1) |
-  | PyTorch    | [1.5.0](https://gitee.com/ascend/pytorch/tree/v1.5.0/) |
+  | 硬件    | [1.0.17](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
+  | NPU固件与驱动 | [6.0.RC1](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
+  | CANN       | [6.0.RC1](https://www.hiascend.com/software/cann/commercial?version=6.0.RC1) |
+  | PyTorch    | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) |
 
 - 环境准备指导。
 
   请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。
   
-- 安装依赖（根据模型需求，按需添加所需依赖）。
+- 安装依赖。
 
   ```
   pip install -r requirements.txt
@@ -63,9 +65,9 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 
 ## 准备数据集
 
-1. 获取数据集。
 
-   请用户自行准备好数据集，可选用ImageNet-1K数据集，包含train和val两部分，数据集目录结构参考如下所示。
+
+   请用户自行准备好数据集，可选用ImageNet-1K数据集，将准备好的数据集解压放至服务器的任意目录下，包含train和val两部分，数据集目录结构参考如下所示。
 
    ```
    ├── ImageNet
@@ -90,8 +92,6 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
                     │   ...              
    ```
 
-2. 数据预处理（按需处理所需要的数据集）。
-
 
 # 开始训练
 
@@ -100,7 +100,7 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 1. 进入解压后的源码包根目录。
 
    ```
-   cd ./ECANet 
+   cd /${模型文件夹名称} 
    ```
 
 2. 运行训练脚本。
@@ -109,7 +109,6 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 
    - 单机单卡训练
 
-     启动单卡训练。
 
      ```
      # training 1p accuracy
@@ -119,12 +118,11 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
      bash ./test/train_performance_1p.sh --data_path=real_data_path
 
      # finetuning 1p 
-     bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path    
+     bash ./test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path    
      ```
 
    - 单机8卡训练
 
-     启动8卡训练。
 
      ```
      # training 8p accuracy
@@ -134,12 +132,12 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
      bash ./test/train_performance_8p.sh --data_path=real_data_path   
 
      #test 8p accuracy
-     bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path   
+     bash ./test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path   
      ```
 
-   --data_path参数填写数据集路径 --pth_path参数填写预训练模型路径
+   其中 ：--data_path参数填写数据集的真实路径； --pth_path参数填写训练中保存的参数文件的真实路径。
 
-   模型训练脚本参数说明如下。
+   模型训练脚本参数说明如下：
 
     ```
     公共参数：
@@ -170,9 +168,9 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 | NAME    | Acc@1 |  FPS | Epochs | AMP_Type |
 | ------- | ----- | ---: | ------ | -------: |
 | 1p-竞品 | -     |  610 | 1      |        - |
-| 1p-NPU  | -     |  911 | 1      |       O2 |
+| 1p-NPU  | -     |  774.04 | 1      |       O2 |
 | 8p-竞品 | 77.91 | 4200 | 100    |        - |
-| 8p-NPU  | 78.30 | 6450 | 100    |       O2 |
+| 8p-NPU  | 77.73 | 6924.75 | 100    |       O2 |
 
 # 版本说明
 
@@ -181,6 +179,7 @@ ECANet是一个高效的图像分类网络。在残差网络上使用了高效
 
 2022.09.14：首次发布。
 
+
 ## 已知问题
 
 
-- 
Gitee