diff --git a/PyTorch/contrib/audio/WaveGlow/requirement.txt b/PyTorch/contrib/audio/WaveGlow/requirement.txt index e85be42a1f454321c5792290abefb6fa7504ea5a..45e6410de9f7e59bd47cf43064a235e257c2ed82 100644 --- a/PyTorch/contrib/audio/WaveGlow/requirement.txt +++ b/PyTorch/contrib/audio/WaveGlow/requirement.txt @@ -1,6 +1,6 @@ matplotlib==3.5.1 numpy==1.20.3 -inflect==5.3.1 +inflect==5.3.0 scipy==1.7.3 Unidecode==1.3.2 Pillow==8.4.0 diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh index 338b8dd79b8c7a3d4fa7a7f492073b2a95557a06..8132b286e3e8c50a53cde219bb12f297c1288f03 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_eval_1p.sh @@ -60,7 +60,7 @@ etp_flag=`echo ${check_etp_flag#*=}` if [ x"${etp_flag}" != x"true" ];then source ${cur_path}/test/env_npu.sh fi -nohup python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py $MODEL --show --out results_solo.pkl --eval segm \ +nohup python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py $MODEL --show --out results_solo.pkl --eval segm \ --data_root=$data_path > ${cur_path}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh index dd5f93484f461d75293ab1c754926acf7404e9f5..648019d58da2ba2ad3f4e89c1f4df2e5f8d05035 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_finetune_1p.sh @@ -67,11 +67,11 @@ fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 export NPUID=0 export RANK=0 -python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ +python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ --data_root=$data_path --gpu-ids 0 --resume_from work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth \ --fine-tune > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -#python tools/test_ins.py configs/solo/solo_r50_fpn_8gpu_1x.py work_dirs/solo_release_r50_fpn_8gpu_1x/latest.pth --show \ +#python3.7 tools/test_ins.py configs/solo/solo_r50_fpn_8gpu_1x.py work_dirs/solo_release_r50_fpn_8gpu_1x/latest.pth --show \ # --out results_solo.pkl --eval segm >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #wait diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh index b44b9a512619d2987862a0d487d1887e5904f609..b4be5448561ecc86dbfde2a078eba4c9464dc3f0 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_1p.sh @@ -67,10 +67,10 @@ fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 export NPUID=0 export RANK=0 -python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 12 \ +python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 12 \ --data_root=$data_path --gpu-ids 0 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh index 08dbdd71413ace8c56f4554d41c5f9625f23d7ce..00f40316f47639de8d8d34704b4682d68ab3eb36 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_full_8p.sh @@ -96,7 +96,7 @@ do fi done wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh index 04d54a85542eba8a1ded47d06a4d09632aa21be2..9cb99d7d0ba0c44b6e3f7128c45f2c7b8f36451f 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_1p.sh @@ -67,10 +67,10 @@ fi #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 export NPUID=0 export RANK=0 -python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ +python3.7 tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py --opt-level $apex --autoscale-lr --seed 0 --total_epochs 1 \ --data_root=$data_path --gpu-ids 0 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh index 83aa6d37c47ed7f1f77aa208c2d973bd0b004c6f..8e00d0e5da88b6513f836e4a4cc53f2ef82d13c8 100644 --- a/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/detection/SOLOv2/test/train_performance_8p.sh @@ -96,7 +96,7 @@ do fi done wait -python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ +python3.7 tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show \ --out results_solo.pkl --eval segm --data_root=$data_path >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait #训练结束时间,不需要修改 diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh index bfede3f4fc330ffbc19d4dfe1d383424e44ab32e..9f09cdc851337bbebdbc31ec36c6e1c0b06d144d 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_full_8p.sh @@ -98,7 +98,16 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7.5 train.py \ +RANK_ID_START=0 +RANK_SIZE=8 + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + +KERNEL_NUM=$(($(nproc)/8)) +PID_START=$((KERNEL_NUM * RANK_ID)) +PID_END=$((PID_START + KERNEL_NUM - 1)) +taskset -c $PID_START-$PID_END python3.7.5 train.py \ --train-images-folder ${data_path}/train2017/ \ --prepared-train-labels ./prepared_train_annotation.pkl \ --val-labels ./val_subset.json \ @@ -118,6 +127,7 @@ python3.7.5 train.py \ --world-size=1 \ --dist-backend 'hccl' \ --amp \ + --gpu=${RANK_ID} \ --loss-scale=16 \ --opt-level O1 \ --device-list '0,1,2,3,4,5,6,7' \ diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh index 09d08597d8ac84dc13e0ae2045bc6a0fb92776cb..55b61fce77f425674c68c0dbd718c30fd676c604 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/test/train_performance_8p.sh @@ -68,7 +68,16 @@ if [ x"${etp_flag}" != x"true" ];then source ${test_path_dir}/env_npu.sh fi -python3.7.5 train.py \ +RANK_ID_START=0 +RANK_SIZE=8 + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + +KERNEL_NUM=$(($(nproc)/8)) +PID_START=$((KERNEL_NUM * RANK_ID)) +PID_END=$((PID_START + KERNEL_NUM - 1)) +taskset -c $PID_START-$PID_END python3.7.5 train.py \ --train-images-folder ${data_path}/train2017/ \ --prepared-train-labels ./prepared_train_annotation.pkl \ --val-labels ./val_subset.json \ @@ -89,6 +98,7 @@ python3.7.5 train.py \ --dist-backend 'hccl' \ --loss-scale=16 \ --amp \ + --gpu=${RANK_ID} \ --opt-level O1 \ --device-list '0,1,2,3,4,5,6,7' \ > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py index ce68710a1cd716f6e7032ab7c1d338d497db105e..1340bf3f4a34ed90c502c628e20acba8f2d428d0 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/train.py @@ -161,10 +161,7 @@ def main(): args.world_size = ngpus_per_node * args.world_size args.distributed = args.world_size > 1 - if args.distributed: - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - else: - main_worker(args.gpu, ngpus_per_node, args) + main_worker(args.gpu, ngpus_per_node, args) def main_worker(gpu, ngpus_per_node, args):