From 2571816331a3daf83bee3a777b3d6d5f64fa5051 Mon Sep 17 00:00:00 2001
From: shenpengcheng <shenpengcheng5@huawei.com>
Date: Wed, 28 Dec 2022 09:25:36 +0000
Subject: [PATCH] fix stargan shell

---
 .../stargan/test/train_performance_1p.sh      | 130 ++++++++++++++++--
 .../stargan/test/train_performance_8p.sh      | 130 ++++++++++++++++--
 2 files changed, 236 insertions(+), 24 deletions(-)

diff --git a/PyTorch/contrib/cv/others/stargan/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/stargan/test/train_performance_1p.sh
index dd089de7b3..77daafce36 100644
--- a/PyTorch/contrib/cv/others/stargan/test/train_performance_1p.sh
+++ b/PyTorch/contrib/cv/others/stargan/test/train_performance_1p.sh
@@ -1,16 +1,122 @@
-INPUT_PATH=$1
+#!/bin/bash
 
-workdir=$(cd $(dirname $0); pwd)
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="stargan"
+# 训练batch_size
+batch_size=16
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+# 训练epoch
+train_epochs=2
+# 指定训练所使用的npu device卡id
+device_id=0
 
-source $workdir/env_npu.sh
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --batch_size* ]];then
+        batch_size=`echo ${para#*=}`
+    fi
+done
 
-if [ ! $INPUT_PATH ]; then
-    nohup python3 -u ./main.py  --mode train --folder_dir stargan_NPU_1p \
-                                         --batch_size 16 --epoch 2 --npus 1 --distributed True \
-                                         --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young &
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
 else
-    nohup python3 -u ./main.py  --mode train --folder_dir stargan_NPU_1p \
-                                         --batch_size 16 --epoch 2 --npus 1 --distributed True\
-                                         --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young \
-                                         --dataset_dir $INPUT_PATH &
-fi
\ No newline at end of file
+    "[Error] device id must be config"
+    exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录，不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3 -u ./main.py \
+    --mode train \
+    --folder_dir stargan_NPU_1p \
+    --batch_size ${batch_size} \
+    --epoch ${train_epochs} \
+    --npus 1 \
+    --distributed True \
+    --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young \
+    --dataset_dir ${data_path} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 结果打印，不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS，需要模型审视修改
+FPS=`grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F ' ' '{print $2}' | awk -F ',' '{print $1}' | awk '{sum+=$1} END{print sum/NR}'`
+
+# 打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 获取性能数据，不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss' '{print $2}'|awk -F ':' '{print $2}'|awk -F ',' '{print $1}' > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+# 最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+# 关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/others/stargan/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/stargan/test/train_performance_8p.sh
index b0db6cb915..9955a4355d 100644
--- a/PyTorch/contrib/cv/others/stargan/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/others/stargan/test/train_performance_8p.sh
@@ -1,16 +1,122 @@
-INPUT_PATH=$1
+#!/bin/bash
 
-workdir=$(cd $(dirname $0); pwd)
+################基础配置参数，需要模型审视修改##################
+# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
+# 网络名称，同目录名称
+Network="stargan"
+# 训练batch_size
+batch_size=16
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+# 训练epoch
+train_epochs=2
+# 指定训练所使用的npu device卡id
+device_id=0
 
-source $workdir/env_npu.sh
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    elif [[ $para == --batch_size* ]];then
+        batch_size=`echo ${para#*=}`
+    fi
+done
 
-if [ ! $INPUT_PATH ]; then
-    nohup python3 -u ./main.py  --mode train --folder_dir stargan_NPU_8p \
-                                         --batch_size 16 --epoch 2 --distributed True --npus 8 \
-                                         --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young &
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
 else
-    nohup python3 -u ./main.py  --mode train --folder_dir stargan_NPU_8p \
-                                         --batch_size 16 --epoch 2 --distributed True --npus 8\
-                                         --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young \
-                                         --dataset_dir $INPUT_PATH &
-fi
\ No newline at end of file
+    "[Error] device id must be config"
+    exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录，不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+
+nohup python3 -u ./main.py \
+    --mode train \
+    --folder_dir stargan_NPU_8p \
+    --batch_size ${batch_size} \
+    --epoch ${train_epochs} \
+    --distributed True \
+    --npus 8 \
+    --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young \
+    --dataset_dir ${data_path} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 结果打印，不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS，需要模型审视修改
+FPS=`grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F ' ' '{print $2}' | awk -F ',' '{print $1}' | awk '{sum+=$1} END{print sum/NR}'`
+
+# 打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 获取性能数据，不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Epoch ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss' '{print $2}'|awk -F ':' '{print $2}'|awk -F ',' '{print $1}' > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+# 最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+# 关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
-- 
Gitee