From e06607e083d9e8cc22be3c724d733fc6e43c0903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=96=87=E6=B4=8B?= <2172363072@qq.com> Date: Thu, 14 Jul 2022 08:22:58 +0000 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTorch/?= =?UTF-8?q?contrib/cv/classification/TResNet/perf.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除文件 PyTorch/contrib/cv/classification/TResNet/demo.py 删除文件 PyTorch/contrib/cv/classification/TResNet/train.py 删除文件 PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py 删除文件 PyTorch/contrib/cv/classification/TResNet/validate.py torch1.8版本 torch1.8版本 update PyTorch/contrib/cv/classification/TResNet/perf.py. update PyTorch/contrib/cv/classification/TResNet/demo.py. 删除文件 PyTorch/contrib/cv/classification/TResNet/perf_prof.py 删除文件 PyTorch/contrib/cv/classification/TResNet/test/1p_perf_prof.sh update PyTorch/contrib/cv/classification/TResNet/perf.py. update PyTorch/contrib/cv/classification/TResNet/perf.py. update PyTorch/contrib/cv/classification/TResNet/train.py. update PyTorch/contrib/cv/classification/TResNet/validate.py. update PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py. 删除文件 PyTorch/contrib/cv/classification/TResNet/README.md 更新readme Signed-off-by: 王文洋 <2172363072@qq.com> 删除文件 PyTorch/contrib/cv/classification/TResNet/README.md 更新readme Signed-off-by: 王文洋 <2172363072@qq.com> 更新 Signed-off-by: 王文洋 <2172363072@qq.com> update PyTorch/contrib/cv/classification/TResNet/train.py. Signed-off-by: 王文洋 <2172363072@qq.com> update PyTorch/contrib/cv/classification/TResNet/perf.py. Signed-off-by: 王文洋 <2172363072@qq.com> 删除文件 PyTorch/contrib/cv/classification/TResNet/README_raw.md 删除文件 PyTorch/contrib/cv/classification/TResNet/README.md 更新readme Signed-off-by: 王文洋 <2172363072@qq.com> update PyTorch/contrib/cv/classification/TResNet/requirements.txt. Signed-off-by: 王文洋 <2172363072@qq.com> update PyTorch/contrib/cv/classification/TResNet/perf.py. Signed-off-by: 王文洋 <2172363072@qq.com> update PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py. Signed-off-by: 王文洋 <2172363072@qq.com> update PyTorch/contrib/cv/classification/TResNet/perf.py. Signed-off-by: 王文洋 <2172363072@qq.com> 删除文件 PyTorch/contrib/cv/classification/TResNet/requirements.txt 更新 Signed-off-by: 王文洋 <2172363072@qq.com> --- .../cv/classification/TResNet/README.md | 192 ++++++++++++++---- .../contrib/cv/classification/TResNet/demo.py | 2 + .../contrib/cv/classification/TResNet/perf.py | 14 +- .../classification/TResNet/requirements.txt | 11 +- .../cv/classification/TResNet/train.py | 11 +- .../TResNet/train_finetune_1p.py | 10 +- .../cv/classification/TResNet/validate.py | 3 +- 7 files changed, 189 insertions(+), 54 deletions(-) diff --git a/PyTorch/contrib/cv/classification/TResNet/README.md b/PyTorch/contrib/cv/classification/TResNet/README.md index 1a981a0999..39b7eb7a07 100644 --- a/PyTorch/contrib/cv/classification/TResNet/README.md +++ b/PyTorch/contrib/cv/classification/TResNet/README.md @@ -1,59 +1,175 @@ -# TResNet +# TResNet for PyTorch -This implements training of TResNet on the ImageNet dataset, mainly modified from [pytorch/examples](https://github.com/rwightman/pytorch-image-models/tree/v0.4.5). -## TResNet Detail +- [概述](#概述) +- [准备训练环境](#准备训练环境) +- [开始训练](#开始训练) +- [训练结果展示](#训练结果展示) +- [版本说明](#版本说明) -First, Ascend-Pytorch has not implement a third-party inplace-abn, so we have to use BatchNorm2D to replace it. -Second, for op PadV3D,reflect mode is not supported currently,so we replace it with Conv2D's padding. -And then, as of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, TResNet is using whilte shape list to solve the problem about TransposeD. +# 概述 -## Requirements +## 简述 -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- Download the ImageNet dataset - - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) +TResNet的设计基于ResNet50架构,并进行专门的修改和优化,旨在不失训练和推理速度的前提下,提升网络的精度,其优化的方面为SpaceToDepth stem、抗锯齿降采样、原地激活批归一化、Block类型选择方法、优化的SE层。 TResNet 在 Top-1 准确度上超越了 ResNet50。 -## Before Traing +- 参考实现: -Please add these shape into white shape list before training:[ 190,3, 224, 224],[ 190, 3, 56, 4, 56, 4],[ 190, 4, 4, 3, 56, 56], + ``` + url=https://github.com/rwightman/pytorch-image-models/tree/v0.4.5 + commit_id=5b28ef410062af2a2ab1f27bf02cf33e2ba28ca2 + ``` -## Training +- 适配昇腾 AI 处理器的实现: -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + ``` + url=https://gitee.com/ascend/ModelZoo-PyTorch.git + code_path=PyTorch/contrib/cv/classification + ``` -```bash -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=${real_data_path} +- 通过Git获取代码方法如下: -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=${real_data_path} + ``` + git clone {url} # 克隆仓库的代码 + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=${real_data_path} +- 通过单击“立即下载”,下载源码包。 -#test 1p accuracy -bash test/train_eval_1p.sh --data_path=${real_data_path}/val --pth_path=${real_pre_train_model_path} +# 准备训练环境 -# finetuning 1p -bash test/train_finetune_1p.sh --data_path=${real_data_path} --pth_path=${real_pre_train_model_path} --num_classes=1001 +## 准备环境 -# demo -python demo.py ${real_pre_train_model_path} -``` +- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。 -Log path: - log/train_log_8p # training detail log - log/perf_8p.log # 8p training performance result log - log/eval.log # 8p training accuracy result log + 表 1 版本配套表 + | 配套 | 版本 | + |----------|---------- | + | 固件与驱动 | [5.1.RC2](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | + | CANN | [5.1.RC2](https://www.hiascend.com/software/cann/commercial?version=5.1.RC2) | + | PyTorch | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) | +- 环境准备指导。 -## TResNet training result + 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 927 | 1 | 1 | O2 | -| 78.87 | 6200 | 8 | 110 | O2 | \ No newline at end of file +- 安装依赖。 + + ``` + pip install -r requirements.txt + ``` + +## 准备数据集 + +1. 获取数据集。 + + 用户自行获取原始数据集,可选用的开源数据集包括ImageNet2012,将数据集上传到服务器任意路径下并解压。 + + 以ImageNet2012数据集为例,数据集目录结构参考如下所示。 + + ``` + ├── ImageNet2012 + ├──train + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ├──... + ├──val + ├──类别1 + │──图片1 + │──图片2 + │ ... + ├──类别2 + │──图片1 + │──图片2 + │ ... + ``` + + > **说明:** + >该数据集的训练过程脚本只作为一种参考示例。 + +# 开始训练 + +## 训练模型 + +1. 进入解压后的源码包根目录。 + + ``` + cd /${模型文件夹名称} + ``` + +2. 运行训练脚本。 + + 该模型支持单机单卡训练和单机8卡训练。 + + - 单机单卡训练 + + 启动单卡训练。 + + ``` + bash ./test/train_full_1p.sh --data_path=/data/xxx/ + ``` + + - 单机8卡训练 + + 启动8卡训练。 + + ``` + bash ./test/train_full_8p.sh --data_path=/data/xxx/ + ``` + + --data_path参数填写数据集路径。 + + 模型训练脚本参数说明如下。 + + ``` + 公共参数: + --model //使用模型 + --workers //加载数据进程数 + --lr //初始学习率 + --epochs //重复训练次数 + --batch-size //训练批次大小 + --world-size //分布式训练节点数量 + --label-smoothing //标签平滑 + --print-freq //打印周期 + --device //使用npu还是gpu + --amp //是否使用混合精度 + --weight_decay //权重衰减 + --loss-scale //混合精度lossscale大小 + --opt-level //混合精度类型 + ``` + + 训练完成后,权重文件保存在当前路径下,并输出模型训练精度和性能信息。 + + +# 训练结果展示 + +**表 2** 训练结果展示表 + +| NAME | Acc@1 | FPS | Epochs | AMP_Type | Torch_version | +| ------- |----- | ---: | ------ | ------- | -------:| +| 1p-竞品 | - |751.26 | 1 | - | - | +| 8p-竞品 | 78.88 |5136.17 | 110 | - | - | +| 1p-NPU | - |927 | 1 | O2 | 1.5 | +| 1p-NPU | - |721.21| 1 | O2 | 1.8 | +| 8p-NPU | 78.87 |6200 | 110 | O2 | 1.5 | +| 8p-NPU |78.7219|5517.23 | 110 | O2 | 1.8 | + +# 版本说明 + +## 变更 + +2022.07.14:更新pytorch1.8版本,重新发布。 + +2021.09.10:首次发布。 + + +## 已知问题 +无。 diff --git a/PyTorch/contrib/cv/classification/TResNet/demo.py b/PyTorch/contrib/cv/classification/TResNet/demo.py index ac8e8a949d..bf1f779773 100644 --- a/PyTorch/contrib/cv/classification/TResNet/demo.py +++ b/PyTorch/contrib/cv/classification/TResNet/demo.py @@ -15,6 +15,8 @@ import argparse import torch import torchvision +if torch.__version__>= '1.8': + import torch_npu from torchvision import datasets, transforms from collections import OrderedDict import sys diff --git a/PyTorch/contrib/cv/classification/TResNet/perf.py b/PyTorch/contrib/cv/classification/TResNet/perf.py index fb0e92ab41..033ae97bc2 100644 --- a/PyTorch/contrib/cv/classification/TResNet/perf.py +++ b/PyTorch/contrib/cv/classification/TResNet/perf.py @@ -28,7 +28,6 @@ NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) """ -import torch.npu import argparse import time import yaml @@ -39,6 +38,9 @@ from contextlib import suppress from datetime import datetime import torch +if torch.__version__>= '1.8': + import torch_npu +import torch.npu import torch.nn as nn import torchvision.utils from torch.nn.parallel import DistributedDataParallel as NativeDDP @@ -62,7 +64,7 @@ except ImportError: has_native_amp = False try: if getattr(torch.npu.amp, 'autocast') is not None: - has_native_amp = True + has_native_amp = False except AttributeError: pass @@ -286,7 +288,8 @@ parser.add_argument('--torchscript', dest='torchscript', action='store_true', help='convert model torchscript for inference') parser.add_argument('--world-size', default=1,type=int, help='world size of group') - +parser.add_argument('--loss-scaler', default='dynamic', + help='loss scale using in amp') def _parse_args(): # Do we have a config file to parse? args_config, remaining = config_parser.parse_known_args() @@ -412,8 +415,9 @@ def main(): amp_autocast = suppress # do nothing loss_scaler = None if use_amp == 'apex': - model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=128.0,combine_grad=True) + model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=args.loss_scaler,combine_grad=True) loss_scaler = ApexScaler() + if args.local_rank == 0: _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') elif use_amp == 'native': @@ -789,5 +793,5 @@ def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix='') if __name__ == '__main__': - + main() diff --git a/PyTorch/contrib/cv/classification/TResNet/requirements.txt b/PyTorch/contrib/cv/classification/TResNet/requirements.txt index c05fc30279..bc9275cb77 100644 --- a/PyTorch/contrib/cv/classification/TResNet/requirements.txt +++ b/PyTorch/contrib/cv/classification/TResNet/requirements.txt @@ -1,4 +1,7 @@ -numpy -torch -opencv-python -Pillow \ No newline at end of file +numpy +opencv-python +Pillow==9.1.0 +pyyaml +torchvision==0.9.1 +decorator +sympy \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/TResNet/train.py b/PyTorch/contrib/cv/classification/TResNet/train.py index 6a31da8b28..03d45cbf5e 100644 --- a/PyTorch/contrib/cv/classification/TResNet/train.py +++ b/PyTorch/contrib/cv/classification/TResNet/train.py @@ -28,6 +28,9 @@ NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) """ +import torch +if torch.__version__>= '1.8': + import torch_npu import torch.npu import argparse import time @@ -61,7 +64,7 @@ except ImportError: has_native_amp = False try: if getattr(torch.npu.amp, 'autocast') is not None: - has_native_amp = True + has_native_amp = False except AttributeError: pass @@ -282,6 +285,8 @@ parser.add_argument('--torchscript', dest='torchscript', action='store_true', help='convert model torchscript for inference') parser.add_argument('--world-size', default=1,type=int, help='world size of group') +parser.add_argument('--loss-scaler', default='dynamic', + help='loss scale using in amp') def _parse_args(): # Do we have a config file to parse? @@ -408,7 +413,7 @@ def main(): amp_autocast = suppress # do nothing loss_scaler = None if use_amp == 'apex': - model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=128.0,combine_grad=True) + model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=args.loss_scaler,combine_grad=True) loss_scaler = ApexScaler() if args.local_rank == 0: _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') @@ -814,5 +819,5 @@ def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix='') if __name__ == '__main__': - + main() diff --git a/PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py b/PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py index e93edda721..e9e57012c6 100644 --- a/PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py +++ b/PyTorch/contrib/cv/classification/TResNet/train_finetune_1p.py @@ -28,7 +28,6 @@ NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) """ -import torch.npu import argparse import time import yaml @@ -40,6 +39,9 @@ from datetime import datetime import torch.optim as optim import torch +if torch.__version__>= '1.8': + import torch_npu +import torch.npu import torch.nn as nn import torchvision.utils from torch.nn.parallel import DistributedDataParallel as NativeDDP @@ -62,7 +64,7 @@ except ImportError: has_native_amp = False try: if getattr(torch.npu.amp, 'autocast') is not None: - has_native_amp = True + has_native_amp = False except AttributeError: pass @@ -285,6 +287,8 @@ parser.add_argument('--torchscript', dest='torchscript', action='store_true', help='convert model torchscript for inference') parser.add_argument('--world-size', default=1,type=int, help='world size of group') +parser.add_argument('--loss-scaler', default='dynamic', + help='loss scale using in amp') def _parse_args(): # Do we have a config file to parse? @@ -433,7 +437,7 @@ def main(): amp_autocast = suppress # do nothing loss_scaler = None if use_amp == 'apex': - model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=128.0,combine_grad=True) + model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=args.loss_scaler,combine_grad=True) loss_scaler = ApexScaler() if args.local_rank == 0: _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') diff --git a/PyTorch/contrib/cv/classification/TResNet/validate.py b/PyTorch/contrib/cv/classification/TResNet/validate.py index eb692161c8..cecd25bc40 100644 --- a/PyTorch/contrib/cv/classification/TResNet/validate.py +++ b/PyTorch/contrib/cv/classification/TResNet/validate.py @@ -32,6 +32,8 @@ import torch.nn as nn import torch.nn.parallel from collections import OrderedDict from contextlib import suppress +if torch.__version__>= '1.8': + import torch_npu from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models from timm.data import create_dataset, create_loader, resolve_data_config, RealLabelsImagenet @@ -123,7 +125,6 @@ parser.add_argument('--real-labels', default='', type=str, metavar='FILENAME', parser.add_argument('--valid-labels', default='', type=str, metavar='FILENAME', help='Valid label indices txt file for validation of partial label space') - def validate(args): # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint -- Gitee