From a971f6667aa0d608e50903310719ee70b70e9637 Mon Sep 17 00:00:00 2001 From: bailang Date: Thu, 31 Mar 2022 10:37:55 +0800 Subject: [PATCH 1/9] =?UTF-8?q?=E4=BF=AE=E6=94=B9openpose=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E8=AE=AD=E7=BB=83=E8=84=9A=E6=9C=AC=20Signed-off-by:?= =?UTF-8?q?=20bailang=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Lightweight_OpenPose/test/train_full_8p.sh | 12 +++++++++++- .../test/train_performance_8p.sh | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh index bfede3f4fc..9f09cdc851 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh @@ -98,7 +98,16 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7.5 train.py \ +RANK_ID_START=0 +RANK_SIZE=8 + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + +KERNEL_NUM=$(($(nproc)/8)) +PID_START=$((KERNEL_NUM * RANK_ID)) +PID_END=$((PID_START + KERNEL_NUM - 1)) +taskset -c $PID_START-$PID_END python3.7.5 train.py \ --train-images-folder ${data_path}/train2017/ \ --prepared-train-labels ./prepared_train_annotation.pkl \ --val-labels ./val_subset.json \ @@ -118,6 +127,7 @@ python3.7.5 train.py \ --world-size=1 \ --dist-backend 'hccl' \ --amp \ + --gpu=${RANK_ID} \ --loss-scale=16 \ --opt-level O1 \ --device-list '0,1,2,3,4,5,6,7' \ diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh index 09d08597d8..55b61fce77 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh @@ -68,7 +68,16 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7.5 train.py \ +RANK_ID_START=0 +RANK_SIZE=8 + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + +KERNEL_NUM=$(($(nproc)/8)) +PID_START=$((KERNEL_NUM * RANK_ID)) +PID_END=$((PID_START + KERNEL_NUM - 1)) +taskset -c $PID_START-$PID_END python3.7.5 train.py \ --train-images-folder ${data_path}/train2017/ \ --prepared-train-labels ./prepared_train_annotation.pkl \ --val-labels ./val_subset.json \ @@ -89,6 +98,7 @@ python3.7.5 train.py \ --dist-backend 'hccl' \ --loss-scale=16 \ --amp \ + --gpu=${RANK_ID} \ --opt-level O1 \ --device-list '0,1,2,3,4,5,6,7' \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -- Gitee From 5de8cebb04a7ae34ec3f9ba01ebbba8504594916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 06:44:40 +0000 Subject: [PATCH 2/9] update PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py. --- .../contrib/cv/pose_estimation/Lightweight_OpenPose/train.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py index ce68710a1c..1340bf3f4a 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py @@ -161,10 +161,7 @@ def main(): args.world_size = ngpus_per_node * args.world_size args.distributed = args.world_size > 1 - if args.distributed: - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - else: - main_worker(args.gpu, ngpus_per_node, args) + main_worker(args.gpu, ngpus_per_node, args) def main_worker(gpu, ngpus_per_node, args): -- Gitee From adc26de2db7e0274800c9743bbbd73e0725bf3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 07:17:15 +0000 Subject: [PATCH 3/9] update PyTorch/contrib/audio/WaveGlow/requirement.txt. --- PyTorch/contrib/audio/WaveGlow/requirement.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/audio/WaveGlow/requirement.txt b/PyTorch/contrib/audio/WaveGlow/requirement.txt index e85be42a1f..45e6410de9 100644 --- a/PyTorch/contrib/audio/WaveGlow/requirement.txt +++ b/PyTorch/contrib/audio/WaveGlow/requirement.txt @@ -1,6 +1,6 @@ matplotlib==3.5.1 numpy==1.20.3 -inflect==5.3.1 +inflect==5.3.0 scipy==1.7.3 Unidecode==1.3.2 Pillow==8.4.0 -- Gitee From 0c9ee11896ba32ca4d4d5407d189e9fa53c739a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 08:00:23 +0000 Subject: [PATCH 4/9] update PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh. --- PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh index 338b8dd79b..8132b286e3 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh @@ -60,7 +60,7 @@ etp_flag=`echo ${check_etp_flag#*=}` if [ x"${etp_flag}" != x"true" ];then source ${cur_path}/test/env_npu.sh fi -nohup python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py $MODEL --show --out results_solo.pkl --eval segm \ +nohup python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py $MODEL --show --out results_solo.pkl --eval segm \ --data_root=$data_path > ${cur_path}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & wait -- Gitee From d6b0058922318a84cb4dbe443e476ff2bcdacee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 08:01:02 +0000 Subject: [PATCH 5/9] update PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh. --- PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh index dd5f93484f..648019d58d 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh @@ -67,11 +67,11 @@ fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 export NPUID=0 export RANK=0 -python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ +python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ --data_root=$data_path --gpu-ids 0 --resume_from work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth \ --fine-tune > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -#python tools/test_ins.py configs/solo/solo_r50_fpn_8gpu_1x.py work_dirs/solo_release_r50_fpn_8gpu_1x/latest.pth --show \ +#python3.7 tools/test_ins.py configs/solo/solo_r50_fpn_8gpu_1x.py work_dirs/solo_release_r50_fpn_8gpu_1x/latest.pth --show \ # --out results_solo.pkl --eval segm >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #wait -- Gitee From 4711d636e305146a666b89eaa88d6f0b03c6a55d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 08:01:35 +0000 Subject: [PATCH 6/9] update PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh. --- PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh index b44b9a5126..b4be544856 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh @@ -67,10 +67,10 @@ fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 export NPUID=0 export RANK=0 -python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 12 \ +python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 12 \ --data_root=$data_path --gpu-ids 0 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -- Gitee From fbe0a6118a939b5d75b56627bb9738fe625d5371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 08:02:09 +0000 Subject: [PATCH 7/9] update PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh. --- PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh index 08dbdd7141..00f40316f4 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh @@ -96,7 +96,7 @@ do fi done wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -- Gitee From fc9acce0df1b6c588e5d90d3153880442fb0af3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 08:02:39 +0000 Subject: [PATCH 8/9] update PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh. --- .../contrib/cv/detection/SOLOv2/test/train_performance_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh index 04d54a8554..9cb99d7d0b 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh @@ -67,10 +67,10 @@ fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 export NPUID=0 export RANK=0 -python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ +python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ --data_root=$data_path --gpu-ids 0 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -- Gitee From 3d0bdb6ef4900fc9c9ae3716343854fd3b75beae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E6=B5=AA?= Date: Thu, 31 Mar 2022 08:03:15 +0000 Subject: [PATCH 9/9] update PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh. --- .../contrib/cv/detection/SOLOv2/test/train_performance_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh index 83aa6d37c4..8e00d0e5da 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh @@ -96,7 +96,7 @@ do fi done wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait #训练结束时间,不需要修改 -- Gitee