diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index 695f9de34e2fa641aea579dc49504ab9e4d42a5c..2932965ffd972a5c05a597dc18c64fbe6751d15b 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -181,10 +181,14 @@ parser.add_argument('-t', '--fine-tuning', action='store_true', help='transfer learning + fine tuning - train only the last FC layer.') +# 图模式 +parser.add_argument('--graph_mode', + action='store_true', + help='whether to enable graph mode.') best_acc1 = 0 - +args = parser.parse_args() def main(): - args = parser.parse_args() + if args.npu is None: args.npu = 0 global CALCULATE_DEVICE @@ -428,6 +432,11 @@ def train(train_loader, model, criterion, optimizer, epoch, args): optimizer.zero_grad() end = time.time() for i, (images, target) in enumerate(train_loader): + # 图模式 + if args.graph_mode: + print("args.graph_mode") + torch.npu.enable_graph_mode() + if i > 100: pass # measure data loading time @@ -438,20 +447,34 @@ def train(train_loader, model, criterion, optimizer, epoch, args): images = images.to(CALCULATE_DEVICE, non_blocking=True) if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - + # 图模式 + if args.graph_mode: + print("args.graph_mode") + target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) + else: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + # 图模式 + if args.graph_mode: + print("args.graph_mode") + target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) + else: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + + # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) + # 图模式 + if not args.graph_mode: + # print("args.graph_mode====================") + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step with amp.scale_loss(loss, optimizer) as scaled_loss: @@ -464,6 +487,13 @@ def train(train_loader, model, criterion, optimizer, epoch, args): param.grad /= batch_size_multiplier optimizer.step() optimizer.zero_grad() + + # 图模式 + if args.graph_mode: + print("args.graph_mode") + torch.npu.launch_graph() + if i == 100: + torch.npu.synchronize() # measure elapsed time batch_time.update(time.time() - end) @@ -474,6 +504,10 @@ def train(train_loader, model, criterion, optimizer, epoch, args): if i == TRAIN_STEP: break + # 图模式 + if args.graph_mode: + print("args.graph_mode") + torch.npu.disable_graph_mode() print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( args.batch_size/batch_time.avg)) @@ -615,12 +649,20 @@ class LabelSmoothing(nn.Module): self.smoothing = smoothing def forward(self, x, target): - logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") + # 图模式 + if args.graph_mode: + logprobs = torch.nn.functional.log_softmax(x, dim=-1) + else: + logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) nll_loss = nll_loss.squeeze(1) smooth_loss = -logprobs.mean(dim=-1) loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean().to(CALCULATE_DEVICE) + # 图模式 + if args.graph_mode: + return loss.mean() + else: + return loss.mean().to(CALCULATE_DEVICE) def lr_policy(lr_fn, logger=None): if logger is not None: diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..f2f584cd464fb0c93a5c9026a6bbd8b082b811f6 --- /dev/null +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ResNet50_ID3071_for_PyTorch" +# 训练batch_size +batch_size=512 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练epoch 90 +train_epochs=1 +# 指定训练所使用的npu device卡id +device_id=0 +# 加载数据进程数 +workers=64 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#修改参数 +sed -i "s|pass|break|g" ${test_path_dir}/../pytorch_resnet50_apex.py +wait +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +python3.7 ./pytorch_resnet50_apex.py \ + --data ${data_path} \ + --npu ${ASCEND_DEVICE_ID} \ + -j ${workers} \ + -b ${batch_size} \ + --lr 0.2 \ + --warmup 5 \ + --label-smoothing=0.0 \ + --epochs ${train_epochs} \ + --graph_mode \ + --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#参数改回 +sed -i "s|break|pass|g" ${test_path_dir}/../pytorch_resnet50_apex.py +wait + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file