From ea68bd0de2fcd35de8e20c94fd8e24acc238f148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E6=98=8A?= <11116189+Oliver-H-Chow@user.noreply.gitee.com> Date: Tue, 7 Jun 2022 07:35:10 +0000 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20PyTorch/?= =?UTF-8?q?contrib/cv/classification/WideResNet101=5F2=5Ffor=5FPytorch/mai?= =?UTF-8?q?n=5Fnpu=5F1p.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除文件 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_8p.py 删除文件 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README.md 【西安交通大学】【高校贡献】【PyTorch】 WideResNet101_2_for_Pytorch 初次提交 # WideResNet101_2_for_Pytorch − 参考实现: ``` url=https://github.com/pytorch/examples/tree/master/imagenet branch=master commit_id=0487749de2fd36f01f4f7f5877b5c9a28ec1fa7f ``` - 精度性能 | Torch | Acc@1 | FPS | Npu_nums | Epochs | | :------: | :------: | :------: | :------: | :------: | | 1.8 | - | 394 | 1 | 1 | | 1.8 | 78.625 | 2929 | 8 | 90 | | :------: | :------: | :------: | :------: | :------: | | 1.5 | - | 386 | 1 | 1 | | 1.5 | 78.627 | 3109 | 8 | 90 | # 自验报告 ```shell # 1p train perf # 是否正确输出了性能log文件 bash train_performance_1p.sh # 验收结果: OK # 8p train perf # 是否正确输出了性能log文件 bash train_performance_8p.sh # 验收结果: OK # 1p train full # 是否正确输出了性能精度log文件,是否正确保存了模型文件 bash train_full_1p.sh # 验收结果: OK # 备注:无输出日志,运行报错,报错日志等 # 8p train full # 是否正确输出了性能精度log文件,是否正确保存了模型文件 bash train_full_8p.sh # 验收结果: OK # 备注:无输出日志,运行报错,报错日志等 删除文件 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_1p.sh 删除文件 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_8p.sh 删除文件 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_8p.sh 删除文件 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_1p.sh 【西安交通大学】【高校贡献】【PyTorch】 WideResNet101_2_for_Pytorch 初次提交 # WideResNet101_2_for_Pytorch − 参考实现: ``` url=https://github.com/pytorch/examples/tree/master/imagenet branch=master commit_id=0487749de2fd36f01f4f7f5877b5c9a28ec1fa7f ``` - 精度性能 | Torch | Acc@1 | FPS | Npu_nums | Epochs | | :------: | :------: | :------: | :------: | :------: | | 1.8 | - | 394 | 1 | 1 | | 1.8 | 78.625 | 2929 | 8 | 90 | | :------: | :------: | :------: | :------: | :------: | | 1.5 | - | 386 | 1 | 1 | | 1.5 | 78.627 | 3109 | 8 | 90 | # 自验报告 ```shell # 1p train perf # 是否正确输出了性能log文件 bash train_performance_1p.sh # 验收结果: OK # 8p train perf # 是否正确输出了性能log文件 bash train_performance_8p.sh # 验收结果: OK # 1p train full # 是否正确输出了性能精度log文件,是否正确保存了模型文件 bash train_full_1p.sh # 验收结果: OK # 备注:无输出日志,运行报错,报错日志等 # 8p train full # 是否正确输出了性能精度log文件,是否正确保存了模型文件 bash train_full_8p.sh # 验收结果: OK # 备注:无输出日志,运行报错,报错日志等 --- .../WideResNet101_2_for_Pytorch/README.md | 11 ++- .../WideResNet101_2_for_Pytorch/README_raw.md | 80 +++++++++++++++++++ .../main_npu_1p.py | 44 +++++----- .../main_npu_8p.py | 48 ++++++----- .../test/train_full_1p.sh | 4 +- .../test/train_full_8p.sh | 4 +- .../test/train_performance_1p.sh | 7 +- .../test/train_performance_8p.sh | 4 +- 8 files changed, 146 insertions(+), 56 deletions(-) create mode 100644 PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README_raw.md diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README.md b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README.md index 1fad48e94b..479b854df0 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README.md @@ -42,10 +42,13 @@ bash ./test/train_eval_8p.sh --data_path=real_data_path ## WideResnet101_2 training result -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 386 | 1 | 1 | O2 | -| 78.627 | 3109 | 8 | 90 | O2 | +| Torch | Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | :------: | +| 1.8 | - | 394 | 1 | 1 | O2 | +| 1.8 | 78.625 | 2929 | 8 | 90 | O2 | +| :------: | :------: | :------: | :------: | :------: | :------: | +| 1.5 | - | 386 | 1 | 1 | O2 | +| 1.5 | 78.627 | 3109 | 8 | 90 | O2 | ``` diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README_raw.md b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README_raw.md new file mode 100644 index 0000000000..1fad48e94b --- /dev/null +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/README_raw.md @@ -0,0 +1,80 @@ +# WideResnet101_2 + +This implements training of WideResnet101_2 on the ImageNet dataset, mainly modified from [pytorch/examples](https://github.com/pytorch/examples/tree/master/imagenet). + +## WideResnet101_2 Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, WideResnet101_2 is re-implemented using semantics such as custom OP. + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- Download the ImageNet dataset from http://www.image-net.org/ + - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) + +## Training + +To train a model, run `main_npu_1p.py` or `main_npu_8p.py` with the desired model architecture and the path to the ImageNet dataset: + +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=real_data_path +``` + +Log path: + test/output/devie_id/train_${device_id}.log # training detail log + test/output/devie_id/WideReesnet50_2_bs8192_8p_perf.log # 8p training performance result log + test/output/devie_id/WideReesnet50_2_bs8192_8p_acc.log # 8p training accuracy result log + +# eval default 8p, should support 1p +bash ./test/train_eval_8p.sh --data_path=real_data_path +``` + +## WideResnet101_2 training result + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 386 | 1 | 1 | O2 | +| 78.627 | 3109 | 8 | 90 | O2 | + +``` + +## Inference +Download the mindx SDK development kit(https://www.hiascend.com/software/mindx-sdk/sdk-detail), version:2.0.2 +then Compile inference image, start the docker +``` +docker build -t infer_image --build-arg FROM_IMAGE_NAME=base_image:tag --build-arg SDK_PKG=sdk_pkg +bash docker_start_infer.sh docker_image model_dir +``` + +# mxbase +configure environment variables and modify label_file and offline_inference model path in opencv.cpp. +then, Execute the program and start inference, +``` +bash build.sh +./wideresnet [val_image_path] +``` +calculate the inference accuracy +``` +python3.7 classification_task_metric.py result/ ../../data/config/val_label.txt . ./result.json +cat result.json +``` + +# sdk +run ''' python main.py --help ''' to view the parameter details and modify them accordingly. +then, start inference and calculate the inference accuracy +``` +bash run.sh ../../data/input/result +python3.7 classification_task_metric.py result/ ../../data/config/val_label.txt . ./result.json +cat result.json +``` diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_1p.py b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_1p.py index 0857a3b661..f640bd8764 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_1p.py +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_1p.py @@ -24,6 +24,10 @@ import torch.npu from apex import amp import torch + +if torch.__version__ >= "1.8.1": + import torch_npu + import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn @@ -41,18 +45,18 @@ from models import resnet_0_6_0 CALCULATE_DEVICE = "npu:0" model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data', metavar='DIR', +parser.add_argument('data', metavar='DIR', default="/opt/npu/imagenet", help='path to dataset') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', choices=model_names, help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=128, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--epochs', default=90, type=int, metavar='N', help='number of total epochs to run') @@ -105,8 +109,8 @@ parser.add_argument('--amp', default=False, action='store_true', help='use amp to train the model') parser.add_argument('--warm_up_epochs', default=0, type=int, help='warm up') -parser.add_argument('--loss-scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--loss-scale', default='dynamic', type=str, + help='loss scale using in amp, default dynamic') parser.add_argument('--opt-level', default='O2', type=str, help='loss scale using in amp, default -1 means dynamic') parser.add_argument('--prof', default=False, action='store_true', @@ -130,7 +134,6 @@ parser.add_argument('--model_url', parser.add_argument('--onnx', default=True, action='store_true', help="convert pth model to onnx") - cur_step = 0 CACHE_TRAINING_URL = "/cache/training/" CACHE_DATA_URL = "/cache/data_url" @@ -222,7 +225,7 @@ def main_worker(gpu, ngpus_per_node, args): pretrained_dict.pop('fc.bias') for param in model.parameters(): param.requires_grad = False - model.fc = nn.Linear(2048,1000) + model.fc = nn.Linear(2048, 1000) else: print("=> creating model wide_resnet101_2") model = resnet_0_6_0.wide_resnet101_2() @@ -232,7 +235,7 @@ def main_worker(gpu, ngpus_per_node, args): # elif args.distributed: ###### modify npu_p1 2###### if args.distributed: - ###### modify npu_p1 2 end ###### + ###### modify npu_p1 2 end ###### # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. @@ -271,9 +274,9 @@ def main_worker(gpu, ngpus_per_node, args): criterion = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) ############## npu modify 4 end ############# optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - nesterov=True, - weight_decay=args.weight_decay) + momentum=args.momentum, + nesterov=True, + weight_decay=args.weight_decay) ###### modify 1 ###### if args.amp: model, optimizer = amp.initialize( @@ -370,7 +373,7 @@ def main_worker(gpu, ngpus_per_node, args): best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): + and args.rank % ngpus_per_node == 0): save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, @@ -420,6 +423,7 @@ def profiling(data_loader, model, criterion, optimizer, args): prof.export_chrome_trace("output.prof") + def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') @@ -454,7 +458,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node loss = criterion(output, target) # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5))# pylint: disable=unbalanced-tuple-unpacking + acc1, acc5 = accuracy(output, target, topk=(1, 5)) # pylint: disable=unbalanced-tuple-unpacking losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) @@ -475,10 +479,10 @@ def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node batch_time.update(time.time() - end) end = time.time() - ###### modify 4 ###### + ###### modify 4 ###### if i % args.print_freq == 0: if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): + and args.rank % ngpus_per_node == 0): progress.display(i) if not args.multiprocessing_distributed or (args.multiprocessing_distributed @@ -495,7 +499,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node def validate(val_loader, model, criterion, args): ###### modify 5 ###### - batch_time = AverageMeter('Time', ':6.3f', start_count_index= 5) + batch_time = AverageMeter('Time', ':6.3f', start_count_index=5) ###### modify 5 end ###### losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') @@ -523,7 +527,7 @@ def validate(val_loader, model, criterion, args): loss = criterion(output, target) # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5))# pylint: disable=unbalanced-tuple-unpacking + acc1, acc5 = accuracy(output, target, topk=(1, 5)) # pylint: disable=unbalanced-tuple-unpacking losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_8p.py b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_8p.py index 0f55c67c7b..1b38bf62a3 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_8p.py +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/main_npu_8p.py @@ -25,6 +25,9 @@ from apex import amp import math import torch + +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn @@ -40,18 +43,18 @@ import models from models import resnet_0_6_0 model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data', metavar='DIR', +parser.add_argument('data', metavar='DIR', default="/opt/npu/imagenet", help='path to dataset') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', choices=model_names, help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=128, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--epochs', default=90, type=int, metavar='N', help='number of total epochs to run') @@ -103,8 +106,8 @@ parser.add_argument('--amp', default=False, action='store_true', help='use amp to train the model') parser.add_argument('--warm_up_epochs', default=0, type=int, help='warm up') -parser.add_argument('--loss-scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--loss-scale', default='dynamic', type=str, + help='loss scale using in amp, default dynamic') parser.add_argument('--opt-level', default='O2', type=str, help='loss scale using in amp, default -1 means dynamic') parser.add_argument('--prof', default=False, action='store_true', @@ -114,6 +117,7 @@ parser.add_argument('--save_path', default='', type=str, best_acc1 = 0 + def device_id_to_process_device_map(device_list): devices = device_list.split(",") devices = [int(x) for x in devices] @@ -125,6 +129,7 @@ def device_id_to_process_device_map(device_list): return process_device_map + def main(): args = parser.parse_args() @@ -205,7 +210,7 @@ def main_worker(gpu, ngpus_per_node, args): model.load_state_dict(pretrained_dict, strict=False) for param in model.parameters(): param.requires_grad = False - model.fc = nn.Linear(2048,1000) + model.fc = nn.Linear(2048, 1000) else: print("=> creating model wide_resnet101_2") model = resnet_0_6_0.wide_resnet101_2() @@ -220,7 +225,6 @@ def main_worker(gpu, ngpus_per_node, args): args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) ############## npu modify end ############# - # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') @@ -259,9 +263,9 @@ def main_worker(gpu, ngpus_per_node, args): # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(loc) optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - nesterov=True, - weight_decay=args.weight_decay) + momentum=args.momentum, + nesterov=True, + weight_decay=args.weight_decay) if args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) @@ -277,7 +281,7 @@ def main_worker(gpu, ngpus_per_node, args): model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.amp: - amp.load_state_dict(checkpoint['amp']) + amp.load_state_dict(checkpoint['amp']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: @@ -310,14 +314,14 @@ def main_worker(gpu, ngpus_per_node, args): best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): + and args.rank % ngpus_per_node == 0): if args.amp: save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, - 'optimizer' : optimizer.state_dict(), + 'optimizer': optimizer.state_dict(), 'amp': amp.state_dict(), }, is_best) else: @@ -326,7 +330,7 @@ def main_worker(gpu, ngpus_per_node, args): 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, - 'optimizer' : optimizer.state_dict(), + 'optimizer': optimizer.state_dict(), }, is_best) @@ -367,6 +371,7 @@ def profiling(data_loader, model, criterion, optimizer, args): prof.export_chrome_trace("output.prof") + def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') @@ -416,7 +421,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node batch_time.update(time.time() - end) end = time.time() - ###### modify 4 ###### + ###### modify 4 ###### if i % args.print_freq == 0: if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): @@ -433,7 +438,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node def validate(val_loader, model, criterion, args, ngpus_per_node): ###### modify 5 ###### - batch_time = AverageMeter('Time', ':6.3f', start_count_index= 5) + batch_time = AverageMeter('Time', ':6.3f', start_count_index=5) ###### modify 5 end ###### losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') @@ -472,11 +477,10 @@ def validate(val_loader, model, criterion, args, ngpus_per_node): and args.rank % ngpus_per_node == 0): progress.display(i) - if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): print("[npu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) + .format(top1=top1, top5=top5)) return top1.avg @@ -572,4 +576,4 @@ def accuracy(output, target, topk=(1,)): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_1p.sh index 428550171f..ad1c12715b 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_1p.sh @@ -111,16 +111,16 @@ python3.7 -u ./main_npu_1p.py \ "${data_path}" \ --lr=0.2 \ --print-freq=10 \ + --workers=128 \ --epochs=${train_epochs} \ --amp \ - --loss-scale=128.0 \ + --loss-scale='dynamic' \ --opt-level='O2' \ --device='npu' \ --world-size=1 \ --npu=${ASCEND_DEVICE_ID} \ --save_path=${test_path_dir}/train_1p_${start_time} \ --batch-size=${batch_size} > ${test_path_dir}/train_1p_${start_time}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - wait ##################获取训练数据################## diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_8p.sh index 00669260e6..955ac88c38 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_full_8p.sh @@ -100,7 +100,7 @@ else mkdir -p ${test_path_dir}/train_8p_${start_time} fi -python3.7 -u ./main_npu_8p.py \ +nohup python -u ./main_npu_8p.py \ "${data_path}" \ --addr=$(hostname -I |awk '{print $1}') \ --lr=${learning_rate} \ @@ -112,7 +112,7 @@ python3.7 -u ./main_npu_8p.py \ --world-size=1 \ --dist-backend='hccl' \ --multiprocessing-distributed \ - --loss-scale=128.0 \ + --loss-scale='dynamic' \ --opt-level='O2' \ --device='npu' \ --rank=0 \ diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_1p.sh index 0d05811ca3..a874cb4e58 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_1p.sh @@ -29,6 +29,7 @@ over_dump=False data_dump_flag=False data_dump_step="10" profiling=False +#profiling=True # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then @@ -58,7 +59,7 @@ done #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" + echo "[Error] para \"data_path\" must be config" exit 1 fi @@ -112,13 +113,13 @@ python3.7 -u ./main_npu_1p.py \ --print-freq=10 \ --epochs=${train_epochs} \ --amp \ - --loss-scale=128.0 \ + --loss-scale='dynamic' \ --opt-level='O2' \ + --workers=128 \ --device='npu' \ --world-size=1 \ --npu=${ASCEND_DEVICE_ID} \ --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - wait ##################获取训练数据################## diff --git a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_8p.sh index 07b51f99d8..43fe8de45d 100644 --- a/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch/test/train_performance_8p.sh @@ -106,14 +106,12 @@ python3.7 -u ./main_npu_8p.py \ --world-size=1 \ --dist-backend='hccl' \ --multiprocessing-distributed \ - --loss-scale=128.0 \ + --loss-scale='dynamic' \ --opt-level='O2' \ --device='npu' \ --rank=0 \ --warm_up_epochs=5 \ --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - - wait ##################获取训练数据################## -- Gitee