From 74e1dec2b6b850bcd30069f302513f7ef5ebb772 Mon Sep 17 00:00:00 2001
From: libaokui <libaokui@huawei.com>
Date: Sat, 14 Jun 2025 16:42:42 +0800
Subject: [PATCH] =?UTF-8?q?=E5=9F=BA=E4=BA=8Emodelzoo=E6=8F=90=E4=BA=A4?=
 =?UTF-8?q?=E8=A7=84=E5=88=99=E5=AE=8C=E5=96=84=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../built-in/rl/VeRL_for_PyTorch/README.md    |  20 +-
 .../test/runtime_env_32b.yaml                 |   5 -
 ...rain_qwen2_5_32b_instruct_DAPO_full_32p.sh | 251 +++++++++++++++++
 ...en2_5_32b_instruct_DAPO_performance_32p.sh | 124 ++++++++-
 ...train_qwen2_5_7b_instruct_DAPO_full_16p.sh | 253 ++++++++++++++++++
 ...wen2_5_7b_instruct_DAPO_performance_16p.sh | 123 ++++++++-
 6 files changed, 753 insertions(+), 23 deletions(-)
 delete mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml
 create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh
 create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh

diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md
index 0243527a24..a3d5acd1ad 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md
@@ -265,17 +265,31 @@ verl‌是一个集SFT（监督学习）与RL（强化学习）于一体的灵
         - 单机16卡训练
 
             ```shell
-            bash test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh
+            bash test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh --data_path=xxx --model_path=xxx   # 16卡训练
             ```
-
+        
+        - 单机16卡性能
+        
+            ```shell
+            bash test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh --data_path=xxx --model_path=xxx   # 16卡性能
+            ```
+        
         `Qwen2.5-32B-Instruct`模型支持双机32卡训练。
 
         - 双机32卡训练
         
             ```shell
             # 主节点执行
-            bash test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh
+            bash test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh --data_path=xxx --model_path=xxx   # 32卡训练
             ```
+        
+        - 双机32卡性能
+        
+            ```shell
+            # 主节点执行
+            bash test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh --data_path=xxx --model_path=xxx   # 32卡性能
+            ```
+
 # 训练结果展示
 
 **表 2**  训练结果展示表
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml
deleted file mode 100644
index c3d76cfb0a..0000000000
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-working_dir: ./
-excludes: ["/.git/"]
-env_vars:
-  HCCL_CONNECT_TIMEOUT: "1500"
-  HCCL_EXEC_TIMEOUT: "1500"
\ No newline at end of file
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh
new file mode 100644
index 0000000000..5cf477b4a2
--- /dev/null
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh
@@ -0,0 +1,251 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_32b_instruct_dapo_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+# 训练配置
+project_name='DAPO'
+exp_name='DAPO-Qwen2.5-32B-Instruct'
+
+adv_estimator=grpo
+
+use_kl_in_reward=False
+kl_coef=0.0
+use_kl_loss=False
+kl_loss_coef=0.0
+
+clip_ratio_low=0.2
+clip_ratio_high=0.28
+
+max_prompt_length=$((1024 * 2))
+max_response_length=$((1024 * 20))
+enable_overlong_buffer=True
+overlong_buffer_len=$((1024 * 4))
+overlong_penalty_factor=1.0
+
+loss_agg_mode="token-mean"
+
+enable_filter_groups=False
+filter_groups_metric=acc
+max_num_gen_batches=10
+train_prompt_bsz=16
+gen_prompt_bsz=$((train_prompt_bsz * 2))
+n_resp_per_prompt=16
+train_prompt_mini_bsz=1
+
+# Ray
+RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
+WORKING_DIR=${WORKING_DIR:-"${PWD}"}
+RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"}
+NNODES=${NNODES:-2}
+# Paths
+RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
+MODEL_PATH=${MODEL_PATH:-"${model_path}"}
+CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"}
+TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"}
+TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"}
+
+# Algorithm
+temperature=1.0
+top_p=1.0
+top_k=-1 # 0 for HF rollout, -1 for vLLM rollout
+
+# Performance Related Parameter
+sp_size=8
+use_dynamic_bsz=True
+actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size))
+infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size))
+offload=True
+gen_tp=4
+
+nohup ray job submit --runtime-env="${RUNTIME_ENV}" \
+    --working-dir "${WORKING_DIR}" \
+    -- python3 -m recipe.dapo.src.main_dapo \
+    data.train_files="${TRAIN_FILE}" \
+    data.val_files="${TEST_FILE}" \
+    data.prompt_key=prompt \
+    data.truncation='left' \
+    data.max_prompt_length=${max_prompt_length} \
+    data.max_response_length=${max_response_length} \
+    data.gen_batch_size=${gen_prompt_bsz} \
+    data.train_batch_size=${train_prompt_bsz} \
+    actor_rollout_ref.rollout.n=${n_resp_per_prompt} \
+    algorithm.adv_estimator=${adv_estimator} \
+    algorithm.use_kl_in_reward=${use_kl_in_reward} \
+    algorithm.kl_ctrl.kl_coef=${kl_coef} \
+    actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
+    actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \
+    actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} \
+    actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} \
+    actor_rollout_ref.actor.clip_ratio_c=10.0 \
+    algorithm.filter_groups.enable=${enable_filter_groups} \
+    algorithm.filter_groups.max_num_gen_batches=${max_num_gen_batches} \
+    algorithm.filter_groups.metric=${filter_groups_metric} \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \
+    actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
+    actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
+    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
+    actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
+    actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
+    actor_rollout_ref.model.path="${MODEL_PATH}" \
+    +actor_rollout_ref.model.override_config.attention_dropout=0. \
+    +actor_rollout_ref.model.override_config.embd_pdrop=0. \
+    +actor_rollout_ref.model.override_config.resid_pdrop=0. \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps=10 \
+    actor_rollout_ref.actor.optim.weight_decay=0.1 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \
+    actor_rollout_ref.actor.fsdp_config.param_offload=${offload} \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=${offload} \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.actor.grad_clip=1.0 \
+    actor_rollout_ref.actor.loss_agg_mode=${loss_agg_mode} \
+    actor_rollout_ref.actor.ulysses_sequence_parallel_size=${sp_size} \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.50 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \
+    actor_rollout_ref.rollout.enable_chunked_prefill=True \
+    actor_rollout_ref.rollout.max_num_batched_tokens=$((max_prompt_length + max_response_length)) \
+    actor_rollout_ref.rollout.temperature=${temperature} \
+    actor_rollout_ref.rollout.top_p=${top_p} \
+    actor_rollout_ref.rollout.top_k="${top_k}" \
+    actor_rollout_ref.rollout.val_kwargs.temperature=${temperature} \
+    actor_rollout_ref.rollout.val_kwargs.top_p=${top_p} \
+    actor_rollout_ref.rollout.val_kwargs.top_k=${top_k} \
+    actor_rollout_ref.rollout.val_kwargs.do_sample=True \
+    actor_rollout_ref.rollout.val_kwargs.n=1 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=${offload} \
+    actor_rollout_ref.ref.ulysses_sequence_parallel_size=${sp_size} \
+    actor_rollout_ref.actor.fsdp_config.fsdp_size=-1 \
+    reward_model.reward_manager=dapo \
+    reward_model.overlong_buffer.enable=${enable_overlong_buffer} \
+    reward_model.overlong_buffer.len=${overlong_buffer_len} \
+    reward_model.overlong_buffer.penalty_factor=${overlong_penalty_factor} \
+    trainer.logger=['console'] \
+    trainer.project_name="${project_name}" \
+    trainer.experiment_name="${exp_name}" \
+    trainer.n_gpus_per_node=16 \
+    trainer.nnodes="${NNODES}" \
+    trainer.val_before_train=False \
+    trainer.test_freq=5 \
+    trainer.save_freq=-1 \
+    trainer.total_epochs=1 \
+    trainer.total_training_steps=100 \
+    trainer.default_local_dir="${CKPTS_DIR}" \
+    trainer.resume_mode=auto \
+    data.shuffle=False \
+    actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.ref.use_torch_compile=False \
+    actor_rollout_ref.actor.entropy_checkpointing=True \
+    actor_rollout_ref.ref.entropy_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.forward_prefetch=True \
+    actor_rollout_ref.ref.fsdp_config.forward_prefetch=True \
+    actor_rollout_ref.actor.fsdp_config.backward_prefetch=BACKWARD_PRE \
+    actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \
+    actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \
+    actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_dapo_full.log 2>&1 &
+
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_dapo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'32p'_'full'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh
index 8df492ae97..457a060dce 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh
@@ -1,6 +1,78 @@
-#!/usr/bin/env bash
-set -euxo pipefail
+#!/bin/bash
 
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_32b_instruct_dapo_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+# 训练配置
 project_name='DAPO'
 exp_name='DAPO-Qwen2.5-32B-Instruct'
 
@@ -33,14 +105,14 @@ train_prompt_mini_bsz=1
 # Ray
 RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
 WORKING_DIR=${WORKING_DIR:-"${PWD}"}
-RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env_32b.yaml"}
+RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"}
 NNODES=${NNODES:-2}
 # Paths
 RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
-MODEL_PATH=${MODEL_PATH:-"${RAY_DATA_HOME}/Qwen2.5-32B-Instruct"}
+MODEL_PATH=${MODEL_PATH:-"${model_path}"}
 CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"}
-TRAIN_FILE=${TRAIN_FILE:-"${RAY_DATA_HOME}/DAPO-Math-17k/data/dapo-math-17k.parquet"}
-TEST_FILE=${TEST_FILE:-"${RAY_DATA_HOME}/AIME-2024/data/aime-2024.parquet"}
+TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"}
+TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"}
 
 # Algorithm
 temperature=1.0
@@ -55,7 +127,7 @@ infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size))
 offload=True
 gen_tp=4
 
-ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
+nohup ray job submit --runtime-env="${RUNTIME_ENV}" \
     --working-dir "${WORKING_DIR}" \
     -- python3 -m recipe.dapo.src.main_dapo \
     data.train_files="${TRAIN_FILE}" \
@@ -128,6 +200,7 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
     trainer.test_freq=5 \
     trainer.save_freq=-1 \
     trainer.total_epochs=1 \
+    trainer.total_training_steps=10 \
     trainer.default_local_dir="${CKPTS_DIR}" \
     trainer.resume_mode=auto \
     data.shuffle=False \
@@ -140,4 +213,39 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
     actor_rollout_ref.actor.fsdp_config.backward_prefetch=BACKWARD_PRE \
     actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \
     actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \
-    actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True
\ No newline at end of file
+    actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_dapo_perf.log 2>&1 &
+
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_dapo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'32p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh
new file mode 100644
index 0000000000..a4a084f3d0
--- /dev/null
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_7b_instruct_dapo_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+# 训练配置
+project_name='DAPO'
+exp_name='DAPO-Qwen2.5-7B-Instruct'
+
+adv_estimator=grpo
+
+use_kl_in_reward=False
+kl_coef=0.0
+use_kl_loss=False
+kl_loss_coef=0.0
+
+clip_ratio_low=0.2
+clip_ratio_high=0.28
+
+max_prompt_length=$((1024 * 2))
+max_response_length=$((1024 * 20))
+enable_overlong_buffer=True
+overlong_buffer_len=$((1024 * 4))
+overlong_penalty_factor=1.0
+
+loss_agg_mode="token-mean"
+
+enable_filter_groups=False
+filter_groups_metric=acc
+max_num_gen_batches=10
+train_prompt_bsz=16
+gen_prompt_bsz=$((train_prompt_bsz * 3))
+n_resp_per_prompt=16
+train_prompt_mini_bsz=1
+
+# Ray
+RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
+WORKING_DIR=${WORKING_DIR:-"${PWD}"}
+RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"}
+NNODES=${NNODES:-1}
+# Paths
+RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
+MODEL_PATH=${MODEL_PATH:-"${model_path}"}
+CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"}
+TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"}
+TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"}
+
+# Algorithm
+temperature=1.0
+top_p=1.0
+top_k=-1 # 0 for HF rollout, -1 for vLLM rollout
+
+# Performance Related Parameter
+sp_size=4
+use_dynamic_bsz=True
+actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size))
+infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size))
+offload=True
+gen_tp=1
+
+nohup ray job submit --runtime-env="${RUNTIME_ENV}" \
+    --working-dir "${WORKING_DIR}" \
+    -- python3 -m recipe.dapo.src.main_dapo \
+    data.train_files="${TRAIN_FILE}" \
+    data.val_files="${TEST_FILE}" \
+    data.prompt_key=prompt \
+    data.truncation='left' \
+    data.max_prompt_length=${max_prompt_length} \
+    data.max_response_length=${max_response_length} \
+    data.gen_batch_size=${gen_prompt_bsz} \
+    data.train_batch_size=${train_prompt_bsz} \
+    actor_rollout_ref.rollout.n=${n_resp_per_prompt} \
+    algorithm.adv_estimator=${adv_estimator} \
+    algorithm.use_kl_in_reward=${use_kl_in_reward} \
+    algorithm.kl_ctrl.kl_coef=${kl_coef} \
+    actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
+    actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \
+    actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} \
+    actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} \
+    actor_rollout_ref.actor.clip_ratio_c=10.0 \
+    algorithm.filter_groups.enable=${enable_filter_groups} \
+    algorithm.filter_groups.max_num_gen_batches=${max_num_gen_batches} \
+    algorithm.filter_groups.metric=${filter_groups_metric} \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \
+    actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
+    actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
+    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
+    actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
+    actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
+    actor_rollout_ref.model.path="${MODEL_PATH}" \
+    +actor_rollout_ref.model.override_config.attention_dropout=0. \
+    +actor_rollout_ref.model.override_config.embd_pdrop=0. \
+    +actor_rollout_ref.model.override_config.resid_pdrop=0. \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.actor.optim.lr_warmup_steps=10 \
+    actor_rollout_ref.actor.optim.weight_decay=0.1 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \
+    actor_rollout_ref.actor.fsdp_config.param_offload=${offload} \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=${offload} \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.actor.grad_clip=1.0 \
+    actor_rollout_ref.actor.loss_agg_mode=${loss_agg_mode} \
+    actor_rollout_ref.actor.ulysses_sequence_parallel_size=${sp_size} \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.50 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \
+    actor_rollout_ref.rollout.enable_chunked_prefill=True \
+    actor_rollout_ref.rollout.max_num_batched_tokens=$((max_prompt_length + max_response_length)) \
+    actor_rollout_ref.rollout.temperature=${temperature} \
+    actor_rollout_ref.rollout.top_p=${top_p} \
+    actor_rollout_ref.rollout.top_k="${top_k}" \
+    actor_rollout_ref.rollout.val_kwargs.temperature=${temperature} \
+    actor_rollout_ref.rollout.val_kwargs.top_p=${top_p} \
+    actor_rollout_ref.rollout.val_kwargs.top_k=${top_k} \
+    actor_rollout_ref.rollout.val_kwargs.do_sample=True \
+    actor_rollout_ref.rollout.val_kwargs.n=1 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=${offload} \
+    actor_rollout_ref.ref.ulysses_sequence_parallel_size=${sp_size} \
+    actor_rollout_ref.actor.fsdp_config.fsdp_size=-1 \
+    reward_model.reward_manager=dapo \
+    reward_model.overlong_buffer.enable=${enable_overlong_buffer} \
+    reward_model.overlong_buffer.len=${overlong_buffer_len} \
+    reward_model.overlong_buffer.penalty_factor=${overlong_penalty_factor} \
+    trainer.logger=['console'] \
+    trainer.project_name="${project_name}" \
+    trainer.experiment_name="${exp_name}" \
+    trainer.n_gpus_per_node=16 \
+    trainer.nnodes="${NNODES}" \
+    trainer.val_before_train=False \
+    trainer.test_freq=5 \
+    trainer.save_freq=-1 \
+    trainer.total_epochs=1 \
+    trainer.total_training_steps=100 \
+    trainer.default_local_dir="${CKPTS_DIR}" \
+    trainer.resume_mode=auto \
+    data.shuffle=False \
+    actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.ref.use_torch_compile=False \
+    actor_rollout_ref.actor.entropy_checkpointing=True \
+    actor_rollout_ref.ref.entropy_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.forward_prefetch=True \
+    actor_rollout_ref.ref.fsdp_config.forward_prefetch=True \
+    actor_rollout_ref.actor.fsdp_config.backward_prefetch=BACKWARD_PRE \
+    actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \
+    actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \
+    actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True \
+    actor_rollout_ref.rollout.seed=1234 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_dapo_full.log 2>&1 &
+
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_dapo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'16p'_'full'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh
index 6376ac0093..3269365d29 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh
@@ -1,6 +1,78 @@
-#!/usr/bin/env bash
-set -euxo pipefail
+#!/bin/bash
 
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_7b_instruct_dapo_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+# 训练配置
 project_name='DAPO'
 exp_name='DAPO-Qwen2.5-7B-Instruct'
 
@@ -37,10 +109,10 @@ RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"}
 NNODES=${NNODES:-1}
 # Paths
 RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
-MODEL_PATH=${MODEL_PATH:-"${RAY_DATA_HOME}/Qwen2.5-7B-Instruct"}
+MODEL_PATH=${MODEL_PATH:-"${model_path}"}
 CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"}
-TRAIN_FILE=${TRAIN_FILE:-"${RAY_DATA_HOME}/DAPO-Math-17k/data/dapo-math-17k.parquet"}
-TEST_FILE=${TEST_FILE:-"${RAY_DATA_HOME}/AIME-2024/data/aime-2024.parquet"}
+TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"}
+TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"}
 
 # Algorithm
 temperature=1.0
@@ -55,7 +127,7 @@ infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size))
 offload=True
 gen_tp=1
 
-ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
+nohup ray job submit --runtime-env="${RUNTIME_ENV}" \
     --working-dir "${WORKING_DIR}" \
     -- python3 -m recipe.dapo.src.main_dapo \
     data.train_files="${TRAIN_FILE}" \
@@ -128,6 +200,7 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
     trainer.test_freq=5 \
     trainer.save_freq=-1 \
     trainer.total_epochs=1 \
+    trainer.total_training_steps=10 \
     trainer.default_local_dir="${CKPTS_DIR}" \
     trainer.resume_mode=auto \
     data.shuffle=False \
@@ -141,4 +214,40 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
     actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \
     actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \
     actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True \
-    actor_rollout_ref.rollout.seed=1234
+    actor_rollout_ref.rollout.seed=1234 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_dapo_perf.log 2>&1 &
+
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_dapo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'16p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
-- 
Gitee