From 7b0ca58f6165744f5d3b2ab0e266a94697907bcd Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Tue, 23 Aug 2022 18:50:22 +0800 Subject: [PATCH 1/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=8F=90=E4=BA=A4?= =?UTF-8?q?=E8=A7=84=E8=8C=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/LICENSE | 23 + PyTorch/contrib/cv/others/VDSR/README.md | 178 ++++++ .../cv/others/VDSR/data/generate_test_mat.m | 28 + .../cv/others/VDSR/data/generate_train.m | 93 +++ PyTorch/contrib/cv/others/VDSR/data/modcrop.m | 12 + .../contrib/cv/others/VDSR/data/store2hdf5.m | 59 ++ PyTorch/contrib/cv/others/VDSR/dataset.py | 16 + PyTorch/contrib/cv/others/VDSR/main.py | 572 ++++++++++++++++++ PyTorch/contrib/cv/others/VDSR/models/vdsr.py | 40 ++ .../contrib/cv/others/VDSR/modelzoo_level.txt | 3 + .../contrib/cv/others/VDSR/requirements.txt | 5 + .../contrib/cv/others/VDSR/test/env_npu.sh | 76 +++ .../cv/others/VDSR/test/train_eval_8p.sh | 154 +++++ .../cv/others/VDSR/test/train_finetune_1p.sh | 156 +++++ .../cv/others/VDSR/test/train_full_1p.sh | 145 +++++ .../cv/others/VDSR/test/train_full_8p.sh | 160 +++++ .../others/VDSR/test/train_performance_1p.sh | 145 +++++ .../others/VDSR/test/train_performance_8p.sh | 160 +++++ 18 files changed, 2025 insertions(+) create mode 100644 PyTorch/contrib/cv/others/VDSR/LICENSE create mode 100644 PyTorch/contrib/cv/others/VDSR/README.md create mode 100644 PyTorch/contrib/cv/others/VDSR/data/generate_test_mat.m create mode 100644 PyTorch/contrib/cv/others/VDSR/data/generate_train.m create mode 100644 PyTorch/contrib/cv/others/VDSR/data/modcrop.m create mode 100644 PyTorch/contrib/cv/others/VDSR/data/store2hdf5.m create mode 100644 PyTorch/contrib/cv/others/VDSR/dataset.py create mode 100644 PyTorch/contrib/cv/others/VDSR/main.py create mode 100644 PyTorch/contrib/cv/others/VDSR/models/vdsr.py create mode 100644 PyTorch/contrib/cv/others/VDSR/modelzoo_level.txt create mode 100644 PyTorch/contrib/cv/others/VDSR/requirements.txt create mode 100644 PyTorch/contrib/cv/others/VDSR/test/env_npu.sh create mode 100644 PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh create mode 100644 PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh create mode 100644 PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh create mode 100644 PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh create mode 100644 PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh create mode 100644 PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh diff --git a/PyTorch/contrib/cv/others/VDSR/LICENSE b/PyTorch/contrib/cv/others/VDSR/LICENSE new file mode 100644 index 0000000000..04f3814c94 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/LICENSE @@ -0,0 +1,23 @@ +The MIT License (MIT) + +Copyright (c) 2017- Jiu XU +Copyright (c) 2017- Rakuten, Inc +Copyright (c) 2017- Rakuten Institute of Technology + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/README.md b/PyTorch/contrib/cv/others/VDSR/README.md new file mode 100644 index 0000000000..5e1a04a96e --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/README.md @@ -0,0 +1,178 @@ +# VDSR for PyTorch + +- [概述](概述.md) +- [准备训练环境](准备训练环境.md) +- [开始训练](开始训练.md) +- [训练结果展示](训练结果展示.md) +- [版本说明](版本说明.md) + + + +# 概述 + +## 简述 + +VDSR是一个经典的超分模型,使用了一种非常深的深度学习模型来进行超分,结合残差学习和很高的学习率来进行模型训练加速,并且使用自适应梯度裁剪来保证训练的稳定性,与SRCNN一样,都是先将低分辨率输入双三次插值到高分辨率,再来进行模型的预测。这里包括两个部分,VGG-like的深层网络模型,每一层卷积中均使用带padding的3x3卷积层,并且随后都会添加一个ReLU来增强模型的非线性,最后使用残差学习来将模型预测到的结果element-wise的形式相加,来得到最终的结果。 + + +- 参考实现: + + ``` + url=https://github.com/twtygqyy/pytorch-vdsr.git + ``` + +- 适配昇腾 AI 处理器的实现: + + ``` + url=https://gitee.com/ascend/ModelZoo-PyTorch.git + code_path=PyTorch/contrib/cv/classification + ``` + +- 通过Git获取代码方法如下: + + ``` + git clone https://gitee.com/yxl0321/ModelZoo-PyTorch.git # 克隆仓库的代码 + cd ./ModelZoo-PyTorch/PyTorch/contrib/cv/others/VDSR # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +- 通过单击“立即下载”,下载源码包。 + +# 准备训练环境 + +## 准备环境 + +- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。 + + **表 1** 版本配套表 + + | 配套 | 版本 | + | ---------- | ------------------------------------------------------------ | + | 固件与驱动 | [1.0.15](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) | + | CANN | [5.1.RC1](https://www.hiascend.com/software/cann/commercial?version=5.1.RC1) | + | PyTorch | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/) + +- 环境准备指导。 + + 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。 + +- 安装依赖(根据模型需求,按需添加所需依赖)。 + + ``` + pip install -r requirements.txt + ``` + + +## 准备数据集 + +1. 训练集。 + + 本模型采用的训练集是T91数据集通过用Matlab双三次插值生成的,创建训练文件得到的,运行./data/generate_train.m 可生成自己的训练集,需要修改"folder=real_image_path", 也可使用已经生成过好的数据集文件,文件路径192.168.99.101物理机下'./forDocker/home/yxl/VDSR/data/train.h5' +2. 测试集。 + + 本模型的测试集是Set5数据集,是由5张高分辨图片进行双三次插值生成,测试集应放在./VDSR文件夹下,测试集路径192.168.99.101物理机下'./forDocker/home/yxl/VDSR/Set5_mat' + + + +## 获取预训练模型 + +预训练模型存放在'./checkpoint'文件夹下,训练过程中每5个epoch保存一次模型。 + +# 开始训练 + +## 训练模型 + +1. 进入解压后的源码包根目录。 + + ``` + cd ./VDSR + ``` + +2. 运行训练脚本。 + + 该模型支持单机单卡训练和单机8卡训练。 + + - 单机单卡训练 + + 启动单卡训练。 + + ``` + # training 1p accuracy + bash ./test/train_full_1p.sh --data_path=real_data_path + + # training 1p performance + bash ./test/train_performance_1p.sh --data_path=real_data_path + + # finetuning 1p + bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + + ``` + + - 单机8卡训练 + + 启动8卡训练。 + + ``` + # training 8p accuracy + bash ./test/train_full_8p.sh --data_path=real_data_path + + # training 8p performance + bash ./test/train_performance_8p.sh --data_path=real_data_path + + #test 8p accuracy + bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + ``` + + --data\_path参数填写数据集路径。默认为./data/train.h5 + --pth\_path参数填写预训练模型路径。默认为./checkpoint/model_epoch_50.pth + + 模型训练脚本参数说明如下。 + + ``` + 公共参数: + --data_path //数据集路径 + --addr //主机地址 + --Epoch //重复训练次数 + --batchSize //训练批次大小 + --lr //初始学习率,默认:0.01 + --momentum //动量,默认:0.9 + --weight_decay //权重衰减,默认:0.0001 + --resume //中断重新开始模型参数路径 + --start-epoch //开始训练epoch 默认1 + --clip //梯度裁剪参数 默认0.4 + --pretrained //预训练模型路径 + --valdata //测试集路径 + --amp //是否使用混合精度 + --loss-scale //混合精度lossscale大小 + --opt-level //混合精度类型 + 多卡训练参数: + --multiprocessing-distributed //是否使用多卡训练 + --device-list '0,1,2,3,4,5,6,7' //多卡训练指定训练用卡 + ``` + + 训练完成后,权重文件保存在'./checkpoint'路径下,并输出模型训练精度和性能信息。 + +# 训练结果展示 + +**表 2** 训练结果展示表 + +我们分别输出了在不同尺度下,模型的PSNR(PSNR_predicted)与使用双三次插值法的PSNR(PSNR_bicubic)以做对比,结果表明,该模型简单有效。 + +| NAME | PSNR_x2 | PSBR_x3 | PSNR_x4 | FPS | Epochs | AMP_Type | +| ------- | ----- | ------ | ------ | ----: | ------ | -------: | +| 1p-竞品 | 37.07 | 33.35 | 31.10 |2434 | 50 | - | +| 1p-NPU | 37.30 | 33.53 | 31.25 |1435 | 50 | O1 | +| 8p-竞品 | 37.03 | 33.34 | 31.04 |10934 | 50 | - | +| 8p-NPU | 37.17 | 33.42 | 31.10 |4385 | 50 | O1 | + + + + + + + + + + + + + diff --git a/PyTorch/contrib/cv/others/VDSR/data/generate_test_mat.m b/PyTorch/contrib/cv/others/VDSR/data/generate_test_mat.m new file mode 100644 index 0000000000..d8ab163a86 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/data/generate_test_mat.m @@ -0,0 +1,28 @@ +clear;close all; +%% settings +folder = 'Set5'; + +%% generate data +filepaths = []; +filepaths = [filepaths; dir(fullfile(folder, '*.bmp'))]; + +scale = [2, 3, 4]; + +for i = 1 : length(filepaths) + im_gt = imread(fullfile(folder,filepaths(i).name)); + for s = 1 : length(scale) + im_gt = modcrop(im_gt, scale(s)); + im_gt = double(im_gt); + im_gt_ycbcr = rgb2ycbcr(im_gt / 255.0); + im_gt_y = im_gt_ycbcr(:,:,1) * 255.0; + im_l_ycbcr = imresize(im_gt_ycbcr,1/scale(s),'bicubic'); + im_b_ycbcr = imresize(im_l_ycbcr,scale(s),'bicubic'); + im_l_y = im_l_ycbcr(:,:,1) * 255.0; + im_l = ycbcr2rgb(im_l_ycbcr) * 255.0; + im_b_y = im_b_ycbcr(:,:,1) * 255.0; + im_b = ycbcr2rgb(im_b_ycbcr) * 255.0; + last = length(filepaths(i).name)-4; + filename = sprintf('Set5_mat/%s_x%s.mat',filepaths(i).name(1 : last),num2str(scale(s))); + save(filename, 'im_gt_y', 'im_b_y', 'im_gt', 'im_b', 'im_l_ycbcr', 'im_l_y', 'im_l'); + end +end diff --git a/PyTorch/contrib/cv/others/VDSR/data/generate_train.m b/PyTorch/contrib/cv/others/VDSR/data/generate_train.m new file mode 100644 index 0000000000..8caba08852 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/data/generate_train.m @@ -0,0 +1,93 @@ +clear;close all; + +folder = 'path/to/train/folder'; + +savepath = 'train.h5'; +size_input = 41; +size_label = 41; +stride = 41; + +%% scale factors +scale = [2,3,4]; +%% downsizing +downsizes = [1,0.7,0.5]; + +%% initialization +data = zeros(size_input, size_input, 1, 1); +label = zeros(size_label, size_label, 1, 1); + +count = 0; +margain = 0; + +%% generate data +filepaths = []; +filepaths = [filepaths; dir(fullfile(folder, '*.jpg'))]; +filepaths = [filepaths; dir(fullfile(folder, '*.bmp'))]; + +for i = 1 : length(filepaths) + for flip = 1: 3 + for degree = 1 : 4 + for s = 1 : length(scale) + for downsize = 1 : length(downsizes) + image = imread(fullfile(folder,filepaths(i).name)); + + if flip == 1 + image = flipdim(image ,1); + end + if flip == 2 + image = flipdim(image ,2); + end + + image = imrotate(image, 90 * (degree - 1)); + + image = imresize(image,downsizes(downsize),'bicubic'); + + if size(image,3)==3 + image = rgb2ycbcr(image); + image = im2double(image(:, :, 1)); + + im_label = modcrop(image, scale(s)); + [hei,wid] = size(im_label); + im_input = imresize(imresize(im_label,1/scale(s),'bicubic'),[hei,wid],'bicubic'); + filepaths(i).name + for x = 1 : stride : hei-size_input+1 + for y = 1 :stride : wid-size_input+1 + + subim_input = im_input(x : x+size_input-1, y : y+size_input-1); + subim_label = im_label(x : x+size_label-1, y : y+size_label-1); + + count=count+1; + + data(:, :, 1, count) = subim_input; + label(:, :, 1, count) = subim_label; + end + end + end + end + end + end + end +end + +order = randperm(count); +data = data(:, :, 1, order); +label = label(:, :, 1, order); + +%% writing to HDF5 +chunksz = 64; +created_flag = false; +totalct = 0; + +for batchno = 1:floor(count/chunksz) + batchno + last_read=(batchno-1)*chunksz; + batchdata = data(:,:,1,last_read+1:last_read+chunksz); + batchlabs = label(:,:,1,last_read+1:last_read+chunksz); + + startloc = struct('dat',[1,1,1,totalct+1], 'lab', [1,1,1,totalct+1]); + curr_dat_sz = store2hdf5(savepath, batchdata, batchlabs, ~created_flag, startloc, chunksz); + created_flag = true; + totalct = curr_dat_sz(end); +end + +h5disp(savepath); diff --git a/PyTorch/contrib/cv/others/VDSR/data/modcrop.m b/PyTorch/contrib/cv/others/VDSR/data/modcrop.m new file mode 100644 index 0000000000..728c688106 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/data/modcrop.m @@ -0,0 +1,12 @@ +function imgs = modcrop(imgs, modulo) +if size(imgs,3)==1 + sz = size(imgs); + sz = sz - mod(sz, modulo); + imgs = imgs(1:sz(1), 1:sz(2)); +else + tmpsz = size(imgs); + sz = tmpsz(1:2); + sz = sz - mod(sz, modulo); + imgs = imgs(1:sz(1), 1:sz(2),:); +end + diff --git a/PyTorch/contrib/cv/others/VDSR/data/store2hdf5.m b/PyTorch/contrib/cv/others/VDSR/data/store2hdf5.m new file mode 100644 index 0000000000..0a0016dca4 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/data/store2hdf5.m @@ -0,0 +1,59 @@ +function [curr_dat_sz, curr_lab_sz] = store2hdf5(filename, data, labels, create, startloc, chunksz) + % *data* is W*H*C*N matrix of images should be normalized (e.g. to lie between 0 and 1) beforehand + % *label* is D*N matrix of labels (D labels per sample) + % *create* [0/1] specifies whether to create file newly or to append to previously created file, useful to store information in batches when a dataset is too big to be held in memory (default: 1) + % *startloc* (point at which to start writing data). By default, + % if create=1 (create mode), startloc.data=[1 1 1 1], and startloc.lab=[1 1]; + % if create=0 (append mode), startloc.data=[1 1 1 K+1], and startloc.lab = [1 K+1]; where K is the current number of samples stored in the HDF + % chunksz (used only in create mode), specifies number of samples to be stored per chunk (see HDF5 documentation on chunking) for creating HDF5 files with unbounded maximum size - TLDR; higher chunk sizes allow faster read-write operations + + % verify that format is right + dat_dims=size(data); + lab_dims=size(labels); + num_samples=dat_dims(end); + + assert(lab_dims(end)==num_samples, 'Number of samples should be matched between data and labels'); + + if ~exist('create','var') + create=true; + end + + + if create + %fprintf('Creating dataset with %d samples\n', num_samples); + if ~exist('chunksz', 'var') + chunksz=1000; + end + if exist(filename, 'file') + fprintf('Warning: replacing existing file %s \n', filename); + delete(filename); + end + h5create(filename, '/data', [dat_dims(1:end-1) Inf], 'Datatype', 'single', 'ChunkSize', [dat_dims(1:end-1) chunksz]); % width, height, channels, number + h5create(filename, '/label', [lab_dims(1:end-1) Inf], 'Datatype', 'single', 'ChunkSize', [lab_dims(1:end-1) chunksz]); % width, height, channels, number + if ~exist('startloc','var') + startloc.dat=[ones(1,length(dat_dims)-1), 1]; + startloc.lab=[ones(1,length(lab_dims)-1), 1]; + end + else % append mode + if ~exist('startloc','var') + info=h5info(filename); + prev_dat_sz=info.Datasets(1).Dataspace.Size; + prev_lab_sz=info.Datasets(2).Dataspace.Size; + assert(prev_dat_sz(1:end-1)==dat_dims(1:end-1), 'Data dimensions must match existing dimensions in dataset'); + assert(prev_lab_sz(1:end-1)==lab_dims(1:end-1), 'Label dimensions must match existing dimensions in dataset'); + startloc.dat=[ones(1,length(dat_dims)-1), prev_dat_sz(end)+1]; + startloc.lab=[ones(1,length(lab_dims)-1), prev_lab_sz(end)+1]; + end + end + + if ~isempty(data) + h5write(filename, '/data', single(data), startloc.dat, size(data)); + h5write(filename, '/label', single(labels), startloc.lab, size(labels)); + end + + if nargout + info=h5info(filename); + curr_dat_sz=info.Datasets(1).Dataspace.Size; + curr_lab_sz=info.Datasets(2).Dataspace.Size; + end +end diff --git a/PyTorch/contrib/cv/others/VDSR/dataset.py b/PyTorch/contrib/cv/others/VDSR/dataset.py new file mode 100644 index 0000000000..125e12552f --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/dataset.py @@ -0,0 +1,16 @@ +import torch.utils.data as data +import torch +import h5py + +class DatasetFromHdf5(data.Dataset): + def __init__(self, file_path): + super(DatasetFromHdf5, self).__init__() + hf = h5py.File(file_path) + self.data = hf.get('data') + self.target = hf.get('label') + + def __getitem__(self, index): + return torch.from_numpy(self.data[index,:,:,:]).float(), torch.from_numpy(self.target[index,:,:,:]).float() + + def __len__(self): + return self.data.shape[0] \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/main.py b/PyTorch/contrib/cv/others/VDSR/main.py new file mode 100644 index 0000000000..8ce6a894f9 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/main.py @@ -0,0 +1,572 @@ +import argparse +import os +import warnings +import torch +if torch.__version__>= '1.8': + import torch_npu +import sys +import time +import random +import math +import glob +import scipy.io as sio +import numpy as np +import apex +from apex import amp +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.multiprocessing as mp +import torch.utils.data.distributed +from torch.autograd import Variable +from torch.utils.data import DataLoader +import torch.distributed as dist +from models.vdsr import Net +from dataset import DatasetFromHdf5 +from apex.parallel import DistributedDataParallel + + +# Training settings +parser = argparse.ArgumentParser(description="PyTorch VDSR") +parser.add_argument('--data_path', default='./data/train.h5', type=str,help='path to dataset') +parser.add_argument("--batchSize", type=int, default=384, help="Training batch size") +parser.add_argument("--nEpochs", type=int, default=50, help="Number of epochs to train for") +parser.add_argument("--lr", type=float, default=0.1, help="Learning Rate. Default=0.1") +parser.add_argument("--step", type=int, default=10, help="Sets the learning rate to the initial LR decayed by momentum every n epochs, Default: n=10") +parser.add_argument("--resume", default="", type=str, help="Path to checkpoint (default: none)") +parser.add_argument("--start-epoch", default=1, type=int, help="Manual epoch number (useful on restarts)") +parser.add_argument("--clip", type=float, default=0.4, help="Clipping Gradients. Default=0.4") +parser.add_argument("--momentum", default=0.9, type=float, help="Momentum, Default: 0.9") +parser.add_argument("--weight-decay", "--wd", default=1e-4, type=float, help="Weight decay, Default: 1e-4") +parser.add_argument('--seed', default=49, type=int,help='seed for initializing training. ') +parser.add_argument('--pretrained', default='', type=str, help='path to pretrained model (default: none)') +parser.add_argument("--gpu", default= None, type=int, help="gpu ids (default: 0)") +parser.add_argument('--num_classes', default=10, type=int,help='The number of classes.') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',help='number of data loading workers (default: 4)') +parser.add_argument('--valdata', default='./Set5_mat', type=str,help='number of data loading workers (default: 4)') +#GPU +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument("--world_size", default=1, type=int, help="Number of threads for data loader to use, Default: 1") +parser.add_argument('--local_rank',default=-1,type=int,help='node rank for distributed training') +parser.add_argument('--loss-scale', default=None, type=float,help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt-level', default='O1', type=str,help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--prof', default=False, action='store_true',help='use profiling to evaluate the performance of model') +parser.add_argument('--amp', default=False, action='store_true',help='use amp to train the model') +parser.add_argument('--distributed',default =False,help='') +parser.add_argument('--addr', default='127.0.0.1',type=str, help='master addr') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7',type=str, help='device id list') +parser.add_argument('--dist-url', default='tcp://127.0.0.1:50001', type=str,help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='hccl', type=str,help='distributed backend') +parser.add_argument('--multiprocessing-distributed', default=False, action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--rank', default=0, type=int,help='node rank for distributed training') + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + +# for servers to immediately record the logs +def flush_print(func): + def new_print(*args, **kwargs): + func(*args, **kwargs) + sys.stdout.flush() + return new_print +print = flush_print(print) + +def main(): + opt = parser.parse_args() + print(opt.device_list) + + os.environ['MASTER_ADDR'] = opt.addr + os.environ['MASTER_PORT'] = '29777' + + if opt.seed is not None: + random.seed(opt.seed) + torch.manual_seed(opt.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if opt.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if opt.dist_url == "env://" and opt.world_size == -1: + opt.world_size = int(os.environ["WORLD_SIZE"]) + + opt.distributed = opt.world_size > 1 or opt.multiprocessing_distributed + + opt.process_device_map = device_id_to_process_device_map(opt.device_list) + + if opt.device == 'npu': + ngpus_per_node = int(os.environ["RANK_SIZE"]) + else: + if opt.distributed: + ngpus_per_node = torch.cuda.device_count() + else: + ngpus_per_node = 1 + print('ngpus_per_node:', ngpus_per_node) + if opt.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + opt.world_size = ngpus_per_node * opt.world_size + print('opt.world_size:',opt.world_size) + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + if opt.device=='npu': + main_worker(opt.gpu, ngpus_per_node,opt) + else: + mp.spawn(main_worker, nprocs=ngpus_per_node, + args=(ngpus_per_node, opt)) + else: + # Simply call main_worker function + main_worker(opt.gpu, ngpus_per_node,opt) + +def main_worker(gpu, ngpus_per_node,opt): + + opt.gpu = opt.process_device_map[gpu] + + if opt.gpu is not None: + print("Use GPU: {} for training".format(opt.gpu)) + + if opt.distributed: + if opt.dist_url == "env://" and opt.rank == -1: + opt.rank = int(os.environ["RANK"]) + if opt.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + opt.rank = opt.rank * ngpus_per_node + gpu + + if opt.device == 'npu': + dist.init_process_group(backend=opt.dist_backend, # init_method=args.dist_url, + world_size=opt.world_size, rank=opt.rank) + else: + dist.init_process_group(backend=opt.dist_backend, init_method=opt.dist_url, + world_size=opt.world_size, rank=opt.rank) + + + print("===> Building model") + if opt.pretrained: + model = Net() + if os.path.isfile(opt.pretrained): + print("=> loading model '{}'".format(opt.pretrained)) + pretrained_dict = torch.load(opt.pretrained, map_location="cpu")["state_dict"] + else: + pretrained_dict = torch.load("./checkpoint/model_epoch_50.pth", map_location="cpu")["state_dict"] + if "fc.weight" in pretrained_dict: + print("pop fc layer weight") + pretrained_dict.pop('fc.weight') + pretrained_dict.pop('fc.bias') + model.load_state_dict(pretrained_dict, strict=False) + else: + model = Net() + + + if opt.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if opt.gpu is not None: + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + torch.npu.set_device(loc) + model = model.to(loc) + else: + torch.cuda.set_device(opt.gpu) + model.cuda(opt.gpu) + + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + opt.batchSize = int(opt.batchSize / opt.world_size) + # opt.workers = int((opt.workers + ngpus_per_node - 1) / ngpus_per_node) + else: + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + model = model.to(loc) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + print("[gpu id:", opt.gpu, "]", + "============================test opt.gpu is not None else==========================") + elif opt.gpu is not None: + print("[gpu id:", opt.gpu, "]", + "============================test elif opt.gpu is not None:==========================") + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + torch.npu.set_device(opt.gpu) + model = model.to(loc) + else: + torch.cuda.set_device(opt.gpu) + model = model.cuda(opt.gpu) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", opt.gpu, "]", "============================test 1==========================") + print("[gpu id:", opt.gpu, "]", "============================test 3==========================") + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + else: + print("before : model = torch.nn.DataParallel(model).cuda()") + + print("===> Setting Optimizer") + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) + if opt.amp: + model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=opt.loss_scale, combine_grad=True) + + print("===> Setting DDP") + if opt.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if opt.gpu is not None: + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + if opt.pretrained: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu], broadcast_buffers=False, + find_unused_parameters=True) + else: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu], broadcast_buffers=False) + else: + print("[gpu id:", opt.gpu, "]", + "============================test opt.gpu is not None else==========================") + model = torch.nn.parallel.DistributedDataParallel(model) + elif opt.gpu is not None: + print("[gpu id:", opt.gpu, "]", + "============================test elif opt.gpu is not None:==========================") + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", opt.gpu, "]", "============================test 1==========================") + print("[gpu id:", opt.gpu, "]", "============================test 3==========================") + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + model = torch.nn.DataParallel(model).to(loc) + else: + model = torch.nn.DataParallel(model).cuda() + + + print("===> Setting LOSS") + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + criterion = nn.MSELoss(reduction='sum').to(loc) + else: + criterion = nn.MSELoss(reduction='sum').cuda(opt.gpu) + + # optionally resume from a checkpoint + if opt.resume: + if os.path.isfile(opt.resume): + print("=> loading checkpoint '{}'".format(opt.resume)) + if opt.gpu is None: + checkpoint = torch.load(opt.resume) + else: + # Map model to be loaded to specified single gpu. + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + else: + loc = 'cuda:{}'.format(opt.gpu) + checkpoint = torch.load(opt.resume, map_location=loc) + opt.start_epoch = checkpoint['epoch'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if opt.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(opt.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(opt.resume)) + + + cudnn.benchmark = True + + print("===> Loading datasets") + train_set = DatasetFromHdf5(opt.data_path) + if opt.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_set) + else: + train_sampler = None + training_data_loader = DataLoader(train_set,batch_size=opt.batchSize, + shuffle=(train_sampler is None), + pin_memory=False, + sampler=train_sampler, + drop_last=True) + + if opt.prof: + profiling(training_data_loader, model, criterion, optimizer, opt) + return + + start_time = time.time() + print("===> Training") + for epoch in range(opt.start_epoch, opt.nEpochs+1): + if opt.distributed: + train_sampler.set_epoch(epoch) + train(training_data_loader, optimizer, model, criterion, epoch, opt, ngpus_per_node) + if not opt.multiprocessing_distributed or (opt.multiprocessing_distributed + and opt.rank % ngpus_per_node == 0): + if (epoch) % 5 != 0: # 5个epoch保存一下,一共保存18个epoch + continue + + ############## npu modify begin ############# + if opt.amp: + save_checkpoint({ + 'epoch': epoch, + 'arch': 'VDSR', + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, epoch) + else: + save_checkpoint({ + 'epoch': epoch, + 'arch': 'VDSR', + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + }, epoch) + ############## npu modify end ############# + + if not opt.multiprocessing_distributed or (opt.multiprocessing_distributed + and opt.rank % ngpus_per_node == 0): + print("===>Testing") + test(model,opt) + +def adjust_learning_rate(optimizer, epoch, opt): + """Sets the learning rate to the initial LR decayed by 10 every 10 epochs""" + lr = opt.lr * (0.1 ** (epoch // opt.step)) + for param_group in optimizer.param_groups: + param_group["lr"] = lr + print("Epoch = {}, lr = {}".format(epoch, optimizer.param_groups[0]["lr"])) + +def profiling(data_loader, model, criterion, optimizer, opt,): + # switch to train mode + model.train() + + def update(model, images, target, optimizer): + output = model(images) + loss = criterion(output, target) + if opt.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.zero_grad() + optimizer.step() + + for step, (images, target) in enumerate(data_loader): + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.float).to(loc, non_blocking=True) + else: + images = images.cuda(opt.gpu, non_blocking=True) + target = target.cuda(opt.gpu, non_blocking=True) + + if step < 5: + update(model, images, target, optimizer) + else: + if opt.device == 'npu': + with torch.autograd.profiler.profile(use_npu=True) as prof: + update(model, images, target, optimizer) + else: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + update(model, images, target, optimizer) + break + + prof.export_chrome_trace("output.prof") + +def train(training_data_loader, optimizer, model, criterion, epoch, opt, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(training_data_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + adjust_learning_rate(optimizer, epoch, opt) + # param_groups:[{'params','lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'},{……}] + model.train() + end = time.time() + for iteration, batch in enumerate(training_data_loader, 1): + input, target = Variable(batch[0]), Variable(batch[1], requires_grad=False) + data_time.update(time.time() - end) + + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + input = input.to(loc, non_blocking=True).to(torch.float) + target = target.to(loc, non_blocking=True).to(torch.float) + # input = input.to(loc, non_blocking=True).to(torch.float) + # target = target.to(torch.float).to(loc, non_blocking=True) + else: + input = input.cuda(opt.gpu, non_blocking=True) + target = target.cuda(opt.gpu, non_blocking=True) + + output = model(input) + loss = criterion(output, target) + + optimizer.zero_grad() + if opt.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + # 梯度裁剪 + nn.utils.clip_grad_norm_(model.parameters(), opt.clip) + optimizer.step() + if opt.device == 'npu': + torch.npu.synchronize() + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if not opt.multiprocessing_distributed or (opt.multiprocessing_distributed + and opt.rank % ngpus_per_node == 0): + if iteration % 100 == 0: + print("===> Epoch[{}]({}/{}): Loss: {:.10f}".format(epoch, iteration, len(training_data_loader), loss.item())) + if batch_time.avg: + print("[npu id:", opt.gpu, "]", "batch_size:", opt.world_size * opt.batchSize, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + opt.batchSize * opt.world_size / batch_time.avg)) + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=10): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print("[npu id:", os.environ['LOCAL_DEVICE_ID'], "]", '\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + +def save_checkpoint(state, epoch): + model_out_path = "checkpoint/" + "model_epoch_{}.pth".format(epoch) + if not os.path.exists("checkpoint/"): + os.makedirs("checkpoint/") + torch.save(state, model_out_path) + print("Checkpoint saved to {}".format(model_out_path)) + +def PSNR(pred, gt, shave_border=0): + height, width = pred.shape[:2] + pred = pred[shave_border:height - shave_border, shave_border:width - shave_border] + gt = gt[shave_border:height - shave_border, shave_border:width - shave_border] + imdff = pred - gt + rmse = math.sqrt(np.mean(imdff ** 2)) + if rmse == 0: + return 100 + return 20 * math.log10(255.0 / rmse) + +def test(model, opt): + + model.eval() + + scales = [2, 3, 4] + + image_list = glob.glob(opt.valdata+"/*.*") + + for scale in scales: + avg_psnr_predicted = 0.0 + avg_psnr_bicubic = 0.0 + avg_elapsed_time = 0.0 + count = 0.0 + for image_name in image_list: + if str(scale) in image_name: + count += 1 + # print("Processing ", image_name) + im_gt_y = sio.loadmat(image_name)['im_gt_y'] + im_b_y = sio.loadmat(image_name)['im_b_y'] + + im_gt_y = im_gt_y.astype(float) + im_b_y = im_b_y.astype(float) + + psnr_bicubic = PSNR(im_gt_y, im_b_y,shave_border=scale) + avg_psnr_bicubic += psnr_bicubic + + im_input = im_b_y/255. + + im_input = Variable(torch.from_numpy(im_input).float()).view(1, -1, im_input.shape[0], im_input.shape[1]) + + if opt.device == 'npu': + loc = 'npu:{}'.format(opt.gpu) + im_input = im_input.to(loc, non_blocking=True).to(torch.float) + else: + im_input = im_input.cuda(opt.gpu, non_blocking=True) + + start_time = time.time() + HR = model(im_input) + elapsed_time = time.time() - start_time + avg_elapsed_time += elapsed_time + + HR = HR.cpu() + + im_h_y = HR.data[0].numpy().astype(np.float32) + + im_h_y = im_h_y * 255. + im_h_y[im_h_y < 0] = 0 + im_h_y[im_h_y > 255.] = 255. + im_h_y = im_h_y[0,:,:] + + psnr_predicted = PSNR(im_gt_y, im_h_y,shave_border=scale) + avg_psnr_predicted += psnr_predicted + + print("Scale=", scale) + print("Dataset=", opt.valdata) + print("PSNR_predicted=", avg_psnr_predicted/count) + print("PSNR_bicubic=", avg_psnr_bicubic/count) + print("It takes average {}s for processing".format(avg_elapsed_time/count)) + +if __name__ == "__main__": + main() + + + diff --git a/PyTorch/contrib/cv/others/VDSR/models/vdsr.py b/PyTorch/contrib/cv/others/VDSR/models/vdsr.py new file mode 100644 index 0000000000..1e0b956b4e --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/models/vdsr.py @@ -0,0 +1,40 @@ +import torch +import torch.nn as nn +from math import sqrt + +class Conv_ReLU_Block(nn.Module): + def __init__(self): + super(Conv_ReLU_Block, self).__init__() + self.conv = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + return self.relu(self.conv(x)) + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.residual_layer = self.make_layer(Conv_ReLU_Block, 18) + self.input = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False) + self.output = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False) + self.relu = nn.ReLU(inplace=True) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, sqrt(2. / n)) + + def make_layer(self, block, num_of_layer): + layers = [] + for _ in range(num_of_layer): + layers.append(block()) + return nn.Sequential(*layers) + + def forward(self, x): + residual = x + out = self.relu(self.input(x)) + out = self.residual_layer(out) + out = self.output(out) + out = torch.add(out,residual) + return out + \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/modelzoo_level.txt b/PyTorch/contrib/cv/others/VDSR/modelzoo_level.txt new file mode 100644 index 0000000000..55a9add9fa --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/modelzoo_level.txt @@ -0,0 +1,3 @@ +FuncStatus:OK +PerfStatus:OK +PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/requirements.txt b/PyTorch/contrib/cv/others/VDSR/requirements.txt new file mode 100644 index 0000000000..cf3531aa47 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/requirements.txt @@ -0,0 +1,5 @@ +scipy +h5py +numpy +apex +glob \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/test/env_npu.sh b/PyTorch/contrib/cv/others/VDSR/test/env_npu.sh new file mode 100644 index 0000000000..099ec5cc8b --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/env_npu.sh @@ -0,0 +1,76 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=0 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=0 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +export HCCL_IF_IP=$(hostname -I |awk '{print $1}') + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh new file mode 100644 index 0000000000..ae83aa4c08 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size resume RANK_SIZE +# 网络名称,同目录名称 +Network="VDSR" +# 训练batch_size +batch_size=384 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" +# checkpoint文件路径,以实际路径为准 +pth_path="" +# 训练epoch +train_epochs=50 +# 学习率 +learning_rate=0.3 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +if [[ $pth_path == "" ]];then + echo "[Error] para \"pth_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +RANK_ID_START=0 +KERNEL_NUM=$(($(nproc)/8)) +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)) +do + PID_START=$((KERNEL_NUM * RANK_ID)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + nohup taskset -c $PID_START-$PID_END python3.7 ./main.py -j ${KERNEL_NUM}\ + --data_path=${data_path} \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --lr=${learning_rate} \ + --momentum=0.9 \ + --weight-decay=1e-4 \ + --gpu=$RANK_ID \ + --dist-url='tcp://127.0.0.1:50011' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world_size=1 \ + --device='npu' \ + --nEpochs=${train_epochs} \ + --resume ${pth_path} \ + --amp \ + --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done + +# python3.7 ./main.py \ +# --data_path=${data_path} \ +# --addr=$(hostname -I |awk '{print $1}') \ +# --seed=49 \ +# --lr=${learning_rate} \ +# --momentum=0.9 \ +# --weight-decay=0.0001 \ +# --dist-url='tcp://127.0.0.1:50011' \ +# --dist-backend 'hccl' \ +# --multiprocessing-distributed \ +# --world_size=1 \ +# --device='npu' \ +# --nEpochs=${train_epochs} \ +# --resume ${pth_path} \ +# --amp \ +# --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`grep Test ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log | awk '{print $8}' | awk 'END {print}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh new file mode 100644 index 0000000000..c4678540bc --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="VDSR" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +# checkpoint文件路径,以实际路径为准 +pth_path="" +# 训练epoch +train_epochs=50 +# 学习率 +learning_rate=0.1 +# 指定训练所使用的npu device卡id +device_id=0 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +if [[ $pth_path == "" ]];then + echo "[Error] para \"pth_path\" must be confing" + exit 1 +fi + +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +nohup python3.7 ./main.py \ + --data_path=${data_path} \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --lr=${learning_rate} \ + --momentum=0.9 \ + --weight-decay=1e-4 \ + --world_size=1 \ + --device='npu' \ + --gpu=${ASCEND_DEVICE_ID} \ + --dist-url='tcp://127.0.0.1:50021' \ + --dist-backend 'hccl' \ + --nEpochs=${train_epochs} \ + --amp \ + --pretrained=${pth_path} \ + --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh new file mode 100644 index 0000000000..c38a0d1f66 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="VDSR" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练epoch +train_epochs=50 +# 学习率 +learning_rate=0.1 +# 指定训练所使用的npu device卡id +device_id=0 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + nohup python3.7 ./main.py \ + --data_path=${data_path} \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --lr=${learning_rate} \ + --momentum=0.9 \ + --weight-decay=1e-4 \ + --world_size=1 \ + --device='npu' \ + --gpu=${ASCEND_DEVICE_ID} \ + --dist-url='tcp://127.0.0.1:50021' \ + --dist-backend 'hccl' \ + --nEpochs=${train_epochs} \ + --amp \ + --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh new file mode 100644 index 0000000000..b1eee630b4 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh @@ -0,0 +1,160 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="VDSR" +# 训练batch_size +batch_size=384 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练epoch +train_epochs=50 +# 学习率 +learning_rate=0.3 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +# nohup python3.7 ./main.py \ +# --data_path=${data_path} \ +# --addr=$(hostname -I |awk '{print $1}') \ +# --seed=49 \ +# --lr=${learning_rate} \ +# --momentum=0.9 \ +# --weight-decay=1e-4 \ +# --dist-url='tcp://127.0.0.1:50011' \ +# --dist-backend 'hccl' \ +# --multiprocessing-distributed \ +# --world_size=1 \ +# --device='npu' \ +# --nEpochs=${train_epochs} \ +# --amp \ +# --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + + +RANK_ID_START=0 +KERNEL_NUM=$(($(nproc)/8)) +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)) +do + PID_START=$((KERNEL_NUM * RANK_ID)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + nohup taskset -c $PID_START-$PID_END python3.7 ./main.py -j ${KERNEL_NUM}\ + --data_path=${data_path} \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --lr=${learning_rate} \ + --momentum=0.9 \ + --weight-decay=1e-4 \ + --gpu=$RANK_ID \ + --dist-url='tcp://127.0.0.1:50011' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world_size=1 \ + --device='npu' \ + --nEpochs=${train_epochs} \ + --amp \ + --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh new file mode 100644 index 0000000000..3acaea573e --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="VDSR" +# 训练batch_size +batch_size=128 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练epoch +train_epochs=2 +# 学习率 +learning_rate=0.1 +# 指定训练所使用的npu device卡id +device_id=0 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + nohup python3.7 ./main.py \ + --data_path=${data_path} \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --lr=${learning_rate} \ + --momentum=0.9 \ + --weight-decay=1e-4 \ + --world_size=1 \ + --device='npu' \ + --gpu=${ASCEND_DEVICE_ID} \ + --dist-url='tcp://127.0.0.1:50021' \ + --dist-backend 'hccl' \ + --nEpochs=${train_epochs} \ + --amp \ + --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh new file mode 100644 index 0000000000..d58feee537 --- /dev/null +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh @@ -0,0 +1,160 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="VDSR" +# 训练batch_size +batch_size=384 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练epoch +train_epochs=2 +# 学习率 +learning_rate=0.3 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +RANK_ID_START=0 +KERNEL_NUM=$(($(nproc)/8)) +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)) +do + PID_START=$((KERNEL_NUM * RANK_ID)) + PID_END=$((PID_START + KERNEL_NUM - 1)) + nohup taskset -c $PID_START-$PID_END python3.7 ./main.py -j ${KERNEL_NUM}\ + --data_path=${data_path} \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --lr=${learning_rate} \ + --momentum=0.9 \ + --weight-decay=1e-4 \ + --gpu=$RANK_ID \ + --dist-url='tcp://127.0.0.1:50011' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world_size=1 \ + --device='npu' \ + --nEpochs=${train_epochs} \ + --amp \ + --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done + +# nohup python3.7 ./main.py \ +# --data_path=${data_path} \ +# --addr=$(hostname -I |awk '{print $1}') \ +# --seed=49 \ +# --lr=${learning_rate} \ +# --momentum=0.9 \ +# --weight-decay=1e-4 \ +# --dist-url='tcp://127.0.0.1:50011' \ +# --dist-backend 'hccl' \ +# --multiprocessing-distributed \ +# --world_size=1 \ +# --device='npu' \ +# --nEpochs=${train_epochs} \ +# --amp \ +# --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file -- Gitee From f7994e413bea605c1f757fd336077fb3b1396d81 Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Thu, 25 Aug 2022 15:36:15 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=A7=84=E8=8C=83?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/models/vdsr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/PyTorch/contrib/cv/others/VDSR/models/vdsr.py b/PyTorch/contrib/cv/others/VDSR/models/vdsr.py index 1e0b956b4e..258171982f 100644 --- a/PyTorch/contrib/cv/others/VDSR/models/vdsr.py +++ b/PyTorch/contrib/cv/others/VDSR/models/vdsr.py @@ -2,9 +2,9 @@ import torch import torch.nn as nn from math import sqrt -class Conv_ReLU_Block(nn.Module): +class ConvReLUBlock(nn.Module): def __init__(self): - super(Conv_ReLU_Block, self).__init__() + super(ConvReLUBlock, self).__init__() self.conv = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False) self.relu = nn.ReLU(inplace=True) @@ -14,7 +14,7 @@ class Conv_ReLU_Block(nn.Module): class Net(nn.Module): def __init__(self): super(Net, self).__init__() - self.residual_layer = self.make_layer(Conv_ReLU_Block, 18) + self.residual_layer = self.make_layer(ConvReLUBlock, 18) self.input = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False) self.output = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False) self.relu = nn.ReLU(inplace=True) @@ -35,6 +35,6 @@ class Net(nn.Module): out = self.relu(self.input(x)) out = self.residual_layer(out) out = self.output(out) - out = torch.add(out,residual) + out = torch.add(out, residual) return out \ No newline at end of file -- Gitee From fcacf5cf44c6f446fa587b97c34a77a022b39655 Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Sat, 10 Sep 2022 11:13:56 +0800 Subject: [PATCH 3/7] =?UTF-8?q?=E8=B0=83=E6=95=B4=E4=B8=BA=E5=A4=9A?= =?UTF-8?q?=E7=BA=BF=E7=A8=8B=E8=AF=BB=E5=8F=96=E6=95=B0=E6=8D=AE=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/README.md | 4 +- PyTorch/contrib/cv/others/VDSR/main.py | 68 ++++++------------- .../cv/others/VDSR/test/train_eval_8p.sh | 19 +----- .../cv/others/VDSR/test/train_finetune_1p.sh | 1 + .../cv/others/VDSR/test/train_full_1p.sh | 1 + .../cv/others/VDSR/test/train_full_8p.sh | 18 +---- .../others/VDSR/test/train_performance_1p.sh | 5 +- .../others/VDSR/test/train_performance_8p.sh | 19 +----- 8 files changed, 30 insertions(+), 105 deletions(-) diff --git a/PyTorch/contrib/cv/others/VDSR/README.md b/PyTorch/contrib/cv/others/VDSR/README.md index 5e1a04a96e..48f9ad6722 100644 --- a/PyTorch/contrib/cv/others/VDSR/README.md +++ b/PyTorch/contrib/cv/others/VDSR/README.md @@ -160,9 +160,9 @@ VDSR是一个经典的超分模型,使用了一种非常深的深度学习模 | NAME | PSNR_x2 | PSBR_x3 | PSNR_x4 | FPS | Epochs | AMP_Type | | ------- | ----- | ------ | ------ | ----: | ------ | -------: | | 1p-竞品 | 37.07 | 33.35 | 31.10 |2434 | 50 | - | -| 1p-NPU | 37.30 | 33.53 | 31.25 |1435 | 50 | O1 | +| 1p-NPU | 37.30 | 33.53 | 31.25 |4112 | 50 | O1 | | 8p-竞品 | 37.03 | 33.34 | 31.04 |10934 | 50 | - | -| 8p-NPU | 37.17 | 33.42 | 31.10 |4385 | 50 | O1 | +| 8p-NPU | 37.17 | 33.42 | 31.10 |8223 | 50 | O1 | diff --git a/PyTorch/contrib/cv/others/VDSR/main.py b/PyTorch/contrib/cv/others/VDSR/main.py index 8ce6a894f9..5d78400f98 100644 --- a/PyTorch/contrib/cv/others/VDSR/main.py +++ b/PyTorch/contrib/cv/others/VDSR/main.py @@ -29,7 +29,7 @@ from apex.parallel import DistributedDataParallel # Training settings parser = argparse.ArgumentParser(description="PyTorch VDSR") parser.add_argument('--data_path', default='./data/train.h5', type=str,help='path to dataset') -parser.add_argument("--batchSize", type=int, default=384, help="Training batch size") +parser.add_argument("--batchSize", type=int, default=128, help="Training batch size") parser.add_argument("--nEpochs", type=int, default=50, help="Number of epochs to train for") parser.add_argument("--lr", type=float, default=0.1, help="Learning Rate. Default=0.1") parser.add_argument("--step", type=int, default=10, help="Sets the learning rate to the initial LR decayed by momentum every n epochs, Default: n=10") @@ -85,10 +85,10 @@ print = flush_print(print) def main(): opt = parser.parse_args() - print(opt.device_list) os.environ['MASTER_ADDR'] = opt.addr os.environ['MASTER_PORT'] = '29777' + os.environ['RANK_SIZE'] = '1' if opt.seed is not None: random.seed(opt.seed) @@ -118,12 +118,10 @@ def main(): ngpus_per_node = torch.cuda.device_count() else: ngpus_per_node = 1 - print('ngpus_per_node:', ngpus_per_node) if opt.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly opt.world_size = ngpus_per_node * opt.world_size - print('opt.world_size:',opt.world_size) # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function if opt.device=='npu': @@ -139,8 +137,6 @@ def main_worker(gpu, ngpus_per_node,opt): opt.gpu = opt.process_device_map[gpu] - if opt.gpu is not None: - print("Use GPU: {} for training".format(opt.gpu)) if opt.distributed: if opt.dist_url == "env://" and opt.rank == -1: @@ -158,16 +154,13 @@ def main_worker(gpu, ngpus_per_node,opt): world_size=opt.world_size, rank=opt.rank) - print("===> Building model") if opt.pretrained: model = Net() if os.path.isfile(opt.pretrained): - print("=> loading model '{}'".format(opt.pretrained)) pretrained_dict = torch.load(opt.pretrained, map_location="cpu")["state_dict"] else: pretrained_dict = torch.load("./checkpoint/model_epoch_50.pth", map_location="cpu")["state_dict"] if "fc.weight" in pretrained_dict: - print("pop fc layer weight") pretrained_dict.pop('fc.weight') pretrained_dict.pop('fc.bias') model.load_state_dict(pretrained_dict, strict=False) @@ -192,7 +185,7 @@ def main_worker(gpu, ngpus_per_node,opt): # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have opt.batchSize = int(opt.batchSize / opt.world_size) - # opt.workers = int((opt.workers + ngpus_per_node - 1) / ngpus_per_node) + opt.workers = int((opt.workers + ngpus_per_node - 1) / ngpus_per_node) else: if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) @@ -201,11 +194,7 @@ def main_worker(gpu, ngpus_per_node,opt): model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set - print("[gpu id:", opt.gpu, "]", - "============================test opt.gpu is not None else==========================") elif opt.gpu is not None: - print("[gpu id:", opt.gpu, "]", - "============================test elif opt.gpu is not None:==========================") if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) torch.npu.set_device(opt.gpu) @@ -215,19 +204,13 @@ def main_worker(gpu, ngpus_per_node,opt): model = model.cuda(opt.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", opt.gpu, "]", "============================test 1==========================") - print("[gpu id:", opt.gpu, "]", "============================test 3==========================") if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) - else: - print("before : model = torch.nn.DataParallel(model).cuda()") - print("===> Setting Optimizer") optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) if opt.amp: model, optimizer = amp.initialize(model, optimizer, opt_level=opt.opt_level, loss_scale=opt.loss_scale, combine_grad=True) - print("===> Setting DDP") if opt.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, @@ -242,16 +225,9 @@ def main_worker(gpu, ngpus_per_node,opt): else: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu], broadcast_buffers=False) else: - print("[gpu id:", opt.gpu, "]", - "============================test opt.gpu is not None else==========================") model = torch.nn.parallel.DistributedDataParallel(model) - elif opt.gpu is not None: - print("[gpu id:", opt.gpu, "]", - "============================test elif opt.gpu is not None:==========================") else: # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", opt.gpu, "]", "============================test 1==========================") - print("[gpu id:", opt.gpu, "]", "============================test 3==========================") if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) model = torch.nn.DataParallel(model).to(loc) @@ -259,7 +235,6 @@ def main_worker(gpu, ngpus_per_node,opt): model = torch.nn.DataParallel(model).cuda() - print("===> Setting LOSS") if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) criterion = nn.MSELoss(reduction='sum').to(loc) @@ -269,7 +244,6 @@ def main_worker(gpu, ngpus_per_node,opt): # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): - print("=> loading checkpoint '{}'".format(opt.resume)) if opt.gpu is None: checkpoint = torch.load(opt.resume) else: @@ -284,40 +258,42 @@ def main_worker(gpu, ngpus_per_node,opt): optimizer.load_state_dict(checkpoint['optimizer']) if opt.amp: amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(opt.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(opt.resume)) cudnn.benchmark = True - print("===> Loading datasets") train_set = DatasetFromHdf5(opt.data_path) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_set) else: train_sampler = None - training_data_loader = DataLoader(train_set,batch_size=opt.batchSize, - shuffle=(train_sampler is None), - pin_memory=False, - sampler=train_sampler, - drop_last=True) + if opt.device == 'npu': + training_data_loader = DataLoader(train_set,batch_size=opt.batchSize, + num_workers=opt.workers, + shuffle=(train_sampler is None), + pin_memory=False, + sampler=train_sampler, + drop_last=True) + else: + training_data_loader = DataLoader(train_set,batch_size=opt.batchSize, + shuffle=(train_sampler is None), + pin_memory=False, + sampler=train_sampler, + drop_last=True) if opt.prof: profiling(training_data_loader, model, criterion, optimizer, opt) return start_time = time.time() - print("===> Training") for epoch in range(opt.start_epoch, opt.nEpochs+1): if opt.distributed: train_sampler.set_epoch(epoch) train(training_data_loader, optimizer, model, criterion, epoch, opt, ngpus_per_node) if not opt.multiprocessing_distributed or (opt.multiprocessing_distributed and opt.rank % ngpus_per_node == 0): - if (epoch) % 5 != 0: # 5个epoch保存一下,一共保存18个epoch + if (epoch) % 5 != 0: # 5个epoch保存一下,一共保存20个epoch continue ############## npu modify begin ############# @@ -340,7 +316,6 @@ def main_worker(gpu, ngpus_per_node,opt): if not opt.multiprocessing_distributed or (opt.multiprocessing_distributed and opt.rank % ngpus_per_node == 0): - print("===>Testing") test(model,opt) def adjust_learning_rate(optimizer, epoch, opt): @@ -348,7 +323,6 @@ def adjust_learning_rate(optimizer, epoch, opt): lr = opt.lr * (0.1 ** (epoch // opt.step)) for param_group in optimizer.param_groups: param_group["lr"] = lr - print("Epoch = {}, lr = {}".format(epoch, optimizer.param_groups[0]["lr"])) def profiling(data_loader, model, criterion, optimizer, opt,): # switch to train mode @@ -399,7 +373,6 @@ def train(training_data_loader, optimizer, model, criterion, epoch, opt, ngpus_p prefix="Epoch: [{}]".format(epoch)) adjust_learning_rate(optimizer, epoch, opt) - # param_groups:[{'params','lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'},{……}] model.train() end = time.time() for iteration, batch in enumerate(training_data_loader, 1): @@ -408,10 +381,8 @@ def train(training_data_loader, optimizer, model, criterion, epoch, opt, ngpus_p if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) - input = input.to(loc, non_blocking=True).to(torch.float) - target = target.to(loc, non_blocking=True).to(torch.float) - # input = input.to(loc, non_blocking=True).to(torch.float) - # target = target.to(torch.float).to(loc, non_blocking=True) + input = input.to(loc, non_blocking=True) + target = target.to(loc, non_blocking=True) else: input = input.cuda(opt.gpu, non_blocking=True) target = target.cuda(opt.gpu, non_blocking=True) @@ -494,7 +465,6 @@ def save_checkpoint(state, epoch): if not os.path.exists("checkpoint/"): os.makedirs("checkpoint/") torch.save(state, model_out_path) - print("Checkpoint saved to {}".format(model_out_path)) def PSNR(pred, gt, shave_border=0): height, width = pred.shape[:2] diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh index ae83aa4c08..5df9b40340 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh @@ -88,6 +88,7 @@ do --lr=${learning_rate} \ --momentum=0.9 \ --weight-decay=1e-4 \ + --workers=16 \ --gpu=$RANK_ID \ --dist-url='tcp://127.0.0.1:50011' \ --dist-backend 'hccl' \ @@ -99,24 +100,6 @@ do --amp \ --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done - -# python3.7 ./main.py \ -# --data_path=${data_path} \ -# --addr=$(hostname -I |awk '{print $1}') \ -# --seed=49 \ -# --lr=${learning_rate} \ -# --momentum=0.9 \ -# --weight-decay=0.0001 \ -# --dist-url='tcp://127.0.0.1:50011' \ -# --dist-backend 'hccl' \ -# --multiprocessing-distributed \ -# --world_size=1 \ -# --device='npu' \ -# --nEpochs=${train_epochs} \ -# --resume ${pth_path} \ -# --amp \ -# --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - wait diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh index c4678540bc..fa6ce12ec1 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh @@ -94,6 +94,7 @@ nohup python3.7 ./main.py \ --lr=${learning_rate} \ --momentum=0.9 \ --weight-decay=1e-4 \ + --workers=4 \ --world_size=1 \ --device='npu' \ --gpu=${ASCEND_DEVICE_ID} \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh index c38a0d1f66..5a24f2612d 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh @@ -85,6 +85,7 @@ fi --lr=${learning_rate} \ --momentum=0.9 \ --weight-decay=1e-4 \ + --workers=4 \ --world_size=1 \ --device='npu' \ --gpu=${ASCEND_DEVICE_ID} \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh index b1eee630b4..a85c20f50b 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh @@ -68,23 +68,6 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -# nohup python3.7 ./main.py \ -# --data_path=${data_path} \ -# --addr=$(hostname -I |awk '{print $1}') \ -# --seed=49 \ -# --lr=${learning_rate} \ -# --momentum=0.9 \ -# --weight-decay=1e-4 \ -# --dist-url='tcp://127.0.0.1:50011' \ -# --dist-backend 'hccl' \ -# --multiprocessing-distributed \ -# --world_size=1 \ -# --device='npu' \ -# --nEpochs=${train_epochs} \ -# --amp \ -# --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - - RANK_ID_START=0 KERNEL_NUM=$(($(nproc)/8)) for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)) @@ -98,6 +81,7 @@ do --lr=${learning_rate} \ --momentum=0.9 \ --weight-decay=1e-4 \ + --workers=16 \ --gpu=$RANK_ID \ --dist-url='tcp://127.0.0.1:50011' \ --dist-backend 'hccl' \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh index 3acaea573e..e5411e9e56 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh @@ -12,11 +12,11 @@ export RANK_SIZE=1 data_path="" # 训练epoch -train_epochs=2 +train_epochs=4 # 学习率 learning_rate=0.1 # 指定训练所使用的npu device卡id -device_id=0 +device_id=4 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 @@ -84,6 +84,7 @@ fi --seed=49 \ --lr=${learning_rate} \ --momentum=0.9 \ + --workers=4 \ --weight-decay=1e-4 \ --world_size=1 \ --device='npu' \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh index d58feee537..78e944a9fa 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh @@ -12,7 +12,7 @@ export RANK_SIZE=8 data_path="" # 训练epoch -train_epochs=2 +train_epochs=4 # 学习率 learning_rate=0.3 @@ -81,6 +81,7 @@ do --lr=${learning_rate} \ --momentum=0.9 \ --weight-decay=1e-4 \ + --workers=16 \ --gpu=$RANK_ID \ --dist-url='tcp://127.0.0.1:50011' \ --dist-backend 'hccl' \ @@ -92,22 +93,6 @@ do --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done -# nohup python3.7 ./main.py \ -# --data_path=${data_path} \ -# --addr=$(hostname -I |awk '{print $1}') \ -# --seed=49 \ -# --lr=${learning_rate} \ -# --momentum=0.9 \ -# --weight-decay=1e-4 \ -# --dist-url='tcp://127.0.0.1:50011' \ -# --dist-backend 'hccl' \ -# --multiprocessing-distributed \ -# --world_size=1 \ -# --device='npu' \ -# --nEpochs=${train_epochs} \ -# --amp \ -# --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - wait -- Gitee From 6016553c91bbe5fa0282a6005d0d766178b87242 Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Thu, 15 Sep 2022 14:30:00 +0800 Subject: [PATCH 4/7] =?UTF-8?q?=E6=B7=BB=E5=8A=A0license?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/LICENSE | 1 + PyTorch/contrib/cv/others/VDSR/README.md | 5 +++-- PyTorch/contrib/cv/others/VDSR/dataset.py | 15 +++++++++++++++ PyTorch/contrib/cv/others/VDSR/main.py | 18 ++++++++++++++++-- PyTorch/contrib/cv/others/VDSR/models/vdsr.py | 15 +++++++++++++++ .../others/VDSR/test/train_performance_8p.sh | 1 - 6 files changed, 50 insertions(+), 5 deletions(-) diff --git a/PyTorch/contrib/cv/others/VDSR/LICENSE b/PyTorch/contrib/cv/others/VDSR/LICENSE index 04f3814c94..7ba87e0766 100644 --- a/PyTorch/contrib/cv/others/VDSR/LICENSE +++ b/PyTorch/contrib/cv/others/VDSR/LICENSE @@ -3,6 +3,7 @@ The MIT License (MIT) Copyright (c) 2017- Jiu XU Copyright (c) 2017- Rakuten, Inc Copyright (c) 2017- Rakuten Institute of Technology +Copyright 2021 Huawei Technologies Co., Ltd Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/PyTorch/contrib/cv/others/VDSR/README.md b/PyTorch/contrib/cv/others/VDSR/README.md index 48f9ad6722..a5b2ba6e64 100644 --- a/PyTorch/contrib/cv/others/VDSR/README.md +++ b/PyTorch/contrib/cv/others/VDSR/README.md @@ -129,7 +129,8 @@ VDSR是一个经典的超分模型,使用了一种非常深的深度学习模 ``` 公共参数: - --data_path //数据集路径 + --data_path //数据集路径 + --workers //读取数据集线程数 --addr //主机地址 --Epoch //重复训练次数 --batchSize //训练批次大小 @@ -162,7 +163,7 @@ VDSR是一个经典的超分模型,使用了一种非常深的深度学习模 | 1p-竞品 | 37.07 | 33.35 | 31.10 |2434 | 50 | - | | 1p-NPU | 37.30 | 33.53 | 31.25 |4112 | 50 | O1 | | 8p-竞品 | 37.03 | 33.34 | 31.04 |10934 | 50 | - | -| 8p-NPU | 37.17 | 33.42 | 31.10 |8223 | 50 | O1 | +| 8p-NPU | 37.17 | 33.42 | 31.10 |6334 | 50 | O1 | diff --git a/PyTorch/contrib/cv/others/VDSR/dataset.py b/PyTorch/contrib/cv/others/VDSR/dataset.py index 125e12552f..a523e19179 100644 --- a/PyTorch/contrib/cv/others/VDSR/dataset.py +++ b/PyTorch/contrib/cv/others/VDSR/dataset.py @@ -1,3 +1,18 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + import torch.utils.data as data import torch import h5py diff --git a/PyTorch/contrib/cv/others/VDSR/main.py b/PyTorch/contrib/cv/others/VDSR/main.py index 5d78400f98..45b0054fdb 100644 --- a/PyTorch/contrib/cv/others/VDSR/main.py +++ b/PyTorch/contrib/cv/others/VDSR/main.py @@ -1,3 +1,18 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + import argparse import os import warnings @@ -88,7 +103,6 @@ def main(): os.environ['MASTER_ADDR'] = opt.addr os.environ['MASTER_PORT'] = '29777' - os.environ['RANK_SIZE'] = '1' if opt.seed is not None: random.seed(opt.seed) @@ -185,7 +199,7 @@ def main_worker(gpu, ngpus_per_node,opt): # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have opt.batchSize = int(opt.batchSize / opt.world_size) - opt.workers = int((opt.workers + ngpus_per_node - 1) / ngpus_per_node) + # opt.workers = int((opt.workers + ngpus_per_node - 1) / ngpus_per_node) else: if opt.device == 'npu': loc = 'npu:{}'.format(opt.gpu) diff --git a/PyTorch/contrib/cv/others/VDSR/models/vdsr.py b/PyTorch/contrib/cv/others/VDSR/models/vdsr.py index 258171982f..60d806cfed 100644 --- a/PyTorch/contrib/cv/others/VDSR/models/vdsr.py +++ b/PyTorch/contrib/cv/others/VDSR/models/vdsr.py @@ -1,3 +1,18 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + import torch import torch.nn as nn from math import sqrt diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh index 78e944a9fa..3dd326f801 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh @@ -92,7 +92,6 @@ do --amp \ --batchSize=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done - wait -- Gitee From a626834e1ae6e39a7977cf7dd15e35b0a6d14d4e Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Thu, 8 Dec 2022 18:34:59 +0800 Subject: [PATCH 5/7] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E9=9B=86=E8=B7=AF=E5=BE=84=E4=BD=9C=E4=B8=BA=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/README.md | 14 ++++++++------ .../contrib/cv/others/VDSR/test/train_eval_8p.sh | 7 ++++++- .../cv/others/VDSR/test/train_finetune_1p.sh | 5 +++++ .../contrib/cv/others/VDSR/test/train_full_1p.sh | 8 ++++++-- .../contrib/cv/others/VDSR/test/train_full_8p.sh | 6 +++++- .../cv/others/VDSR/test/train_performance_1p.sh | 10 +++++++--- .../cv/others/VDSR/test/train_performance_8p.sh | 6 +++++- 7 files changed, 42 insertions(+), 14 deletions(-) diff --git a/PyTorch/contrib/cv/others/VDSR/README.md b/PyTorch/contrib/cv/others/VDSR/README.md index a5b2ba6e64..4f2cbed4d8 100644 --- a/PyTorch/contrib/cv/others/VDSR/README.md +++ b/PyTorch/contrib/cv/others/VDSR/README.md @@ -97,13 +97,13 @@ VDSR是一个经典的超分模型,使用了一种非常深的深度学习模 ``` # training 1p accuracy - bash ./test/train_full_1p.sh --data_path=real_data_path + bash ./test/train_full_1p.sh --data_path=real_data_path --valdata=valdata_path # training 1p performance - bash ./test/train_performance_1p.sh --data_path=real_data_path + bash ./test/train_performance_1p.sh --data_path=real_data_path --valdata=valdata_path # finetuning 1p - bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + bash test/train_finetune_1p.sh --data_path=real_data_path --valdata=valdata_path --pth_path=real_pre_train_model_path ``` @@ -113,16 +113,17 @@ VDSR是一个经典的超分模型,使用了一种非常深的深度学习模 ``` # training 8p accuracy - bash ./test/train_full_8p.sh --data_path=real_data_path + bash ./test/train_full_8p.sh --data_path=real_data_path --valdata=valdata_path # training 8p performance - bash ./test/train_performance_8p.sh --data_path=real_data_path + bash ./test/train_performance_8p.sh --data_path=real_data_path --valdata=valdata_path #test 8p accuracy - bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + bash test/train_eval_8p.sh --data_path=real_data_path --valdata=valdata_path --pth_path=real_pre_train_model_path ``` --data\_path参数填写数据集路径。默认为./data/train.h5 + --valdata参数填写测试集路径。默认为./Set5_mat --pth\_path参数填写预训练模型路径。默认为./checkpoint/model_epoch_50.pth 模型训练脚本参数说明如下。 @@ -130,6 +131,7 @@ VDSR是一个经典的超分模型,使用了一种非常深的深度学习模 ``` 公共参数: --data_path //数据集路径 + --valdata //测试集路径 --workers //读取数据集线程数 --addr //主机地址 --Epoch //重复训练次数 diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh index 5df9b40340..77a8de4a62 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh @@ -10,6 +10,8 @@ batch_size=384 export RANK_SIZE=8 # 数据集路径,保持为空,不需要修改 data_path="" +# 测试集路径,保持为空,不需要修改 +valdata="" # checkpoint文件路径,以实际路径为准 pth_path="" # 训练epoch @@ -28,6 +30,8 @@ do data_path=`echo ${para#*=}` elif [[ $para == --pth_path* ]];then pth_path=`echo ${para#*=}` + elif [[ $para == --valdata* ]];then + valdata=`echo ${para#*=}` fi done @@ -83,12 +87,13 @@ do PID_END=$((PID_START + KERNEL_NUM - 1)) nohup taskset -c $PID_START-$PID_END python3.7 ./main.py -j ${KERNEL_NUM}\ --data_path=${data_path} \ + --valdata=${valdata} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ --lr=${learning_rate} \ --momentum=0.9 \ --weight-decay=1e-4 \ - --workers=16 \ + --workers=4 \ --gpu=$RANK_ID \ --dist-url='tcp://127.0.0.1:50011' \ --dist-backend 'hccl' \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh index fa6ce12ec1..ff3b02dde5 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh @@ -10,6 +10,8 @@ batch_size=128 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 data_path="" +# 测试集路径,保持为空,不需要修改 +valdata="" # checkpoint文件路径,以实际路径为准 pth_path="" # 训练epoch @@ -26,6 +28,8 @@ do device_id=`echo ${para#*=}` elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --valdata* ]];then + valdata=`echo ${para#*=}` elif [[ $para == --pth_path* ]];then pth_path=`echo ${para#*=}` fi @@ -89,6 +93,7 @@ if [ x"${etp_flag}" != x"true" ];then fi nohup python3.7 ./main.py \ --data_path=${data_path} \ + --valdata=${valdata} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ --lr=${learning_rate} \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh index 5a24f2612d..9190171505 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh @@ -10,13 +10,14 @@ batch_size=128 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 data_path="" - +# 测试集路径,保持为空,不需要修改 +valdata="" # 训练epoch train_epochs=50 # 学习率 learning_rate=0.1 # 指定训练所使用的npu device卡id -device_id=0 +device_id=4 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 @@ -26,6 +27,8 @@ do device_id=`echo ${para#*=}` elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --valdata* ]];then + valdata=`echo ${para#*=}` fi done @@ -80,6 +83,7 @@ if [ x"${etp_flag}" != x"true" ];then fi nohup python3.7 ./main.py \ --data_path=${data_path} \ + --valdata=${valdata} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ --lr=${learning_rate} \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh index a85c20f50b..7ae9496899 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh @@ -10,7 +10,8 @@ batch_size=384 export RANK_SIZE=8 # 数据集路径,保持为空,不需要修改 data_path="" - +# 测试集路径,保持为空,不需要修改 +valdata="" # 训练epoch train_epochs=50 # 学习率 @@ -25,6 +26,8 @@ do workers=`echo ${para#*=}` elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --valdata* ]];then + valdata=`echo ${para#*=}` fi done @@ -76,6 +79,7 @@ do PID_END=$((PID_START + KERNEL_NUM - 1)) nohup taskset -c $PID_START-$PID_END python3.7 ./main.py -j ${KERNEL_NUM}\ --data_path=${data_path} \ + --valdata=${valdata} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ --lr=${learning_rate} \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh index e5411e9e56..7b0a42bbed 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh @@ -10,13 +10,14 @@ batch_size=128 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 data_path="" - +# 测试集路径,保持为空,不需要修改 +valdata="" # 训练epoch train_epochs=4 # 学习率 learning_rate=0.1 # 指定训练所使用的npu device卡id -device_id=4 +device_id=0 # 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 @@ -26,6 +27,8 @@ do device_id=`echo ${para#*=}` elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --valdata* ]];then + valdata=`echo ${para#*=}` fi done @@ -80,12 +83,13 @@ if [ x"${etp_flag}" != x"true" ];then fi nohup python3.7 ./main.py \ --data_path=${data_path} \ + --valdata=${valdata} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ --lr=${learning_rate} \ --momentum=0.9 \ - --workers=4 \ --weight-decay=1e-4 \ + --workers=4 \ --world_size=1 \ --device='npu' \ --gpu=${ASCEND_DEVICE_ID} \ diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh index 3dd326f801..d686db0b4c 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh @@ -10,7 +10,8 @@ batch_size=384 export RANK_SIZE=8 # 数据集路径,保持为空,不需要修改 data_path="" - +# 测试集路径,保持为空,不需要修改 +valdata="" # 训练epoch train_epochs=4 # 学习率 @@ -25,6 +26,8 @@ do workers=`echo ${para#*=}` elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --valdata* ]];then + valdata=`echo ${para#*=}` fi done @@ -76,6 +79,7 @@ do PID_END=$((PID_START + KERNEL_NUM - 1)) nohup taskset -c $PID_START-$PID_END python3.7 ./main.py -j ${KERNEL_NUM}\ --data_path=${data_path} \ + --valdata=${valdata} \ --addr=$(hostname -I |awk '{print $1}') \ --seed=49 \ --lr=${learning_rate} \ -- Gitee From ae9052087123a4540455f316ca783846817f01d3 Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Tue, 13 Dec 2022 21:13:07 +0800 Subject: [PATCH 6/7] =?UTF-8?q?!1554=20[=E8=A5=BF=E5=AE=89=E4=BA=A4?= =?UTF-8?q?=E9=80=9A=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=95=88=E8=B4=A1?= =?UTF-8?q?=E7=8C=AE][PyTorch][VDSR]-=E6=89=93=E5=8D=B0=E7=B2=BE=E5=BA=A6?= =?UTF-8?q?=E5=88=B0=E4=B8=BB=E9=A1=B5=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh | 6 +++--- PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh | 4 ++-- PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh | 4 ++-- PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh | 4 ++-- PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh | 4 ++-- PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh index 77a8de4a62..b0eea76f85 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_eval_8p.sh @@ -116,10 +116,10 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'PSNR_predicted' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "PSNR_predicted=" 'NR==1{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final 2xPSNR_predicted : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" # 训练用例信息,不需要修改 diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh index ff3b02dde5..072be0a056 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh @@ -126,9 +126,9 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'PSNR_predicted' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "PSNR_predicted=" 'NR==1{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final 2xPSNR_predicted : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh index 9190171505..19b095e6a1 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh @@ -114,9 +114,9 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'PSNR_predicted' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "PSNR_predicted=" 'NR==1{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final 2xPSNR_predicted : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh index 7ae9496899..58683f6448 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh @@ -112,9 +112,9 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'PSNR_predicted' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "PSNR_predicted=" 'NR==1{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final 2xPSNR_predicted : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh index 7b0a42bbed..066002dc10 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh @@ -114,9 +114,9 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'PSNR_predicted' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "PSNR_predicted=" 'NR==1{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final 2xPSNR_predicted : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh index d686db0b4c..95c32fa64a 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh @@ -112,9 +112,9 @@ FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D echo "Final Performance images/sec : $FPS" #输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "PSNR_predicted=" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a 'PSNR_predicted' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "PSNR_predicted=" 'NR==1{print $NF}'|awk -F " " '{print $1}'` #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" +echo "Final 2xPSNR_predicted : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 -- Gitee From 42fefc1b8c268757447cdd965385b193c75e60bb Mon Sep 17 00:00:00 2001 From: yxl0321 <2565963290@qq.com> Date: Thu, 15 Dec 2022 19:14:39 +0800 Subject: [PATCH 7/7] =?UTF-8?q?!1554=20[=E8=A5=BF=E5=AE=89=E4=BA=A4?= =?UTF-8?q?=E9=80=9A=E5=A4=A7=E5=AD=A6][=E9=AB=98=E6=95=88=E8=B4=A1?= =?UTF-8?q?=E7=8C=AE][PyTorch][VDSR]-=E4=BF=AE=E6=94=B9=E6=89=93=E5=8D=B0?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh | 2 +- PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh | 2 +- PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh | 2 +- PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh | 2 +- PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh index 072be0a056..e668449b8b 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_finetune_1p.sh @@ -144,7 +144,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh index 19b095e6a1..882669a879 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_1p.sh @@ -132,7 +132,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh index 58683f6448..e8189c2aa3 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_full_8p.sh @@ -130,7 +130,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh index 066002dc10..14566a12e5 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_1p.sh @@ -132,7 +132,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` diff --git a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh index 95c32fa64a..bee61e34a3 100644 --- a/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/others/VDSR/test/train_performance_8p.sh @@ -130,7 +130,7 @@ ActualFPS=${FPS} TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` #从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt #最后一个迭代loss值,不需要修改 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` -- Gitee