From 4c8a715594d52facbcbee62150897105d87e4abd Mon Sep 17 00:00:00 2001 From: Jializheng Date: Thu, 28 Jul 2022 09:09:53 +0000 Subject: [PATCH] =?UTF-8?q?=E5=8F=96=E6=B6=88=20save=5Fcheckpoint=20?= =?UTF-8?q?=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mBART_ID2372_for_PyTorch/test/train_performance_16p.sh | 4 ---- .../nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh | 2 -- .../nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh | 3 --- 3 files changed, 9 deletions(-) diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_16p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_16p.sh index 535b97995e..b9039d7334 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_16p.sh @@ -74,8 +74,6 @@ if [[ $data_path == "" ]];then fi -sed -i "s|checkpoint_utils.save_checkpoint(|#checkpoint_utils.save_checkpoint(|g" $cur_path/../fairseq_cli/train.py - export HCCL_IF_IP=$fix_node_ip export MASTER_ADDR=$one_node_ip export MASTER_PORT=29688 @@ -195,5 +193,3 @@ echo "ActualFPS = ${ActualWPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log - -sed -i "s|#checkpoint_utils.save_checkpoint(|checkpoint_utils.save_checkpoint(|g" $cur_path/../fairseq_cli/train.py diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh index d4a0781b2a..c49ed97ca1 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_1p.sh @@ -89,7 +89,6 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -sed -i "s|checkpoint_utils.save_checkpoint(|#checkpoint_utils.save_checkpoint(|g" $cur_path/fairseq_cli/train.py #创建DeviceID输出目录,不需要修改 if [ -d $cur_path/test/output ];then rm -rf $cur_path/test/output/* @@ -123,7 +122,6 @@ wait end=$(date +%s) e2e_time=$(( $end - $start )) -sed -i "s|#checkpoint_utils.save_checkpoint(|checkpoint_utils.save_checkpoint(|g" $cur_path/fairseq_cli/train.py #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh index 59bd2fdf09..efb8421051 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/train_performance_8p.sh @@ -71,7 +71,6 @@ if [[ $data_path == "" ]];then exit 1 fi -sed -i "s|checkpoint_utils.save_checkpoint(|#checkpoint_utils.save_checkpoint(|g" $cur_path/fairseq_cli/train.py ##################创建日志输出目录,根据模型审视################## # 模型采用非循环方式启动多卡训练,创建日志输出目录如下;采用循环方式启动多卡训练的模型,在循环中创建日志输出目录,可参考CRNN模型 # 非循环方式下8卡训练日志输出路径中的ASCEND_DEVICE_ID默认为0,只是人为指定文件夹名称, 不涉及训练业务 @@ -195,5 +194,3 @@ echo "ActualFPS = ${ActualWPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${C echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log - -sed -i "s|#checkpoint_utils.save_checkpoint(|checkpoint_utils.save_checkpoint(|g" $cur_path/fairseq_cli/train.py \ No newline at end of file -- Gitee