From 74e1dec2b6b850bcd30069f302513f7ef5ebb772 Mon Sep 17 00:00:00 2001 From: libaokui Date: Sat, 14 Jun 2025 16:42:42 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9F=BA=E4=BA=8Emodelzoo=E6=8F=90=E4=BA=A4?= =?UTF-8?q?=E8=A7=84=E5=88=99=E5=AE=8C=E5=96=84=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/rl/VeRL_for_PyTorch/README.md | 20 +- .../test/runtime_env_32b.yaml | 5 - ...rain_qwen2_5_32b_instruct_DAPO_full_32p.sh | 251 +++++++++++++++++ ...en2_5_32b_instruct_DAPO_performance_32p.sh | 124 ++++++++- ...train_qwen2_5_7b_instruct_DAPO_full_16p.sh | 253 ++++++++++++++++++ ...wen2_5_7b_instruct_DAPO_performance_16p.sh | 123 ++++++++- 6 files changed, 753 insertions(+), 23 deletions(-) delete mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md index 0243527a24..a3d5acd1ad 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md @@ -265,17 +265,31 @@ verl‌是一个集SFT(监督学习)与RL(强化学习)于一体的灵 - 单机16卡训练 ```shell - bash test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh + bash test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh --data_path=xxx --model_path=xxx # 16卡训练 ``` - + + - 单机16卡性能 + + ```shell + bash test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh --data_path=xxx --model_path=xxx # 16卡性能 + ``` + `Qwen2.5-32B-Instruct`模型支持双机32卡训练。 - 双机32卡训练 ```shell # 主节点执行 - bash test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh + bash test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh --data_path=xxx --model_path=xxx # 32卡训练 ``` + + - 双机32卡性能 + + ```shell + # 主节点执行 + bash test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh --data_path=xxx --model_path=xxx # 32卡性能 + ``` + # 训练结果展示 **表 2** 训练结果展示表 diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml deleted file mode 100644 index c3d76cfb0a..0000000000 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/runtime_env_32b.yaml +++ /dev/null @@ -1,5 +0,0 @@ -working_dir: ./ -excludes: ["/.git/"] -env_vars: - HCCL_CONNECT_TIMEOUT: "1500" - HCCL_EXEC_TIMEOUT: "1500" \ No newline at end of file diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh new file mode 100644 index 0000000000..5cf477b4a2 --- /dev/null +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh @@ -0,0 +1,251 @@ +#!/bin/bash + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_32b_instruct_dapo_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_32b_instruct_DAPO_full_32p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +# 训练配置 +project_name='DAPO' +exp_name='DAPO-Qwen2.5-32B-Instruct' + +adv_estimator=grpo + +use_kl_in_reward=False +kl_coef=0.0 +use_kl_loss=False +kl_loss_coef=0.0 + +clip_ratio_low=0.2 +clip_ratio_high=0.28 + +max_prompt_length=$((1024 * 2)) +max_response_length=$((1024 * 20)) +enable_overlong_buffer=True +overlong_buffer_len=$((1024 * 4)) +overlong_penalty_factor=1.0 + +loss_agg_mode="token-mean" + +enable_filter_groups=False +filter_groups_metric=acc +max_num_gen_batches=10 +train_prompt_bsz=16 +gen_prompt_bsz=$((train_prompt_bsz * 2)) +n_resp_per_prompt=16 +train_prompt_mini_bsz=1 + +# Ray +RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"} +WORKING_DIR=${WORKING_DIR:-"${PWD}"} +RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"} +NNODES=${NNODES:-2} +# Paths +RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"} +MODEL_PATH=${MODEL_PATH:-"${model_path}"} +CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"} +TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"} +TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"} + +# Algorithm +temperature=1.0 +top_p=1.0 +top_k=-1 # 0 for HF rollout, -1 for vLLM rollout + +# Performance Related Parameter +sp_size=8 +use_dynamic_bsz=True +actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size)) +infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size)) +offload=True +gen_tp=4 + +nohup ray job submit --runtime-env="${RUNTIME_ENV}" \ + --working-dir "${WORKING_DIR}" \ + -- python3 -m recipe.dapo.src.main_dapo \ + data.train_files="${TRAIN_FILE}" \ + data.val_files="${TEST_FILE}" \ + data.prompt_key=prompt \ + data.truncation='left' \ + data.max_prompt_length=${max_prompt_length} \ + data.max_response_length=${max_response_length} \ + data.gen_batch_size=${gen_prompt_bsz} \ + data.train_batch_size=${train_prompt_bsz} \ + actor_rollout_ref.rollout.n=${n_resp_per_prompt} \ + algorithm.adv_estimator=${adv_estimator} \ + algorithm.use_kl_in_reward=${use_kl_in_reward} \ + algorithm.kl_ctrl.kl_coef=${kl_coef} \ + actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \ + actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \ + actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} \ + actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} \ + actor_rollout_ref.actor.clip_ratio_c=10.0 \ + algorithm.filter_groups.enable=${enable_filter_groups} \ + algorithm.filter_groups.max_num_gen_batches=${max_num_gen_batches} \ + algorithm.filter_groups.metric=${filter_groups_metric} \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \ + actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \ + actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \ + actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \ + actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \ + actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \ + actor_rollout_ref.model.path="${MODEL_PATH}" \ + +actor_rollout_ref.model.override_config.attention_dropout=0. \ + +actor_rollout_ref.model.override_config.embd_pdrop=0. \ + +actor_rollout_ref.model.override_config.resid_pdrop=0. \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.actor.optim.lr_warmup_steps=10 \ + actor_rollout_ref.actor.optim.weight_decay=0.1 \ + actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \ + actor_rollout_ref.actor.fsdp_config.param_offload=${offload} \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=${offload} \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.actor.grad_clip=1.0 \ + actor_rollout_ref.actor.loss_agg_mode=${loss_agg_mode} \ + actor_rollout_ref.actor.ulysses_sequence_parallel_size=${sp_size} \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.50 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \ + actor_rollout_ref.rollout.enable_chunked_prefill=True \ + actor_rollout_ref.rollout.max_num_batched_tokens=$((max_prompt_length + max_response_length)) \ + actor_rollout_ref.rollout.temperature=${temperature} \ + actor_rollout_ref.rollout.top_p=${top_p} \ + actor_rollout_ref.rollout.top_k="${top_k}" \ + actor_rollout_ref.rollout.val_kwargs.temperature=${temperature} \ + actor_rollout_ref.rollout.val_kwargs.top_p=${top_p} \ + actor_rollout_ref.rollout.val_kwargs.top_k=${top_k} \ + actor_rollout_ref.rollout.val_kwargs.do_sample=True \ + actor_rollout_ref.rollout.val_kwargs.n=1 \ + actor_rollout_ref.ref.fsdp_config.param_offload=${offload} \ + actor_rollout_ref.ref.ulysses_sequence_parallel_size=${sp_size} \ + actor_rollout_ref.actor.fsdp_config.fsdp_size=-1 \ + reward_model.reward_manager=dapo \ + reward_model.overlong_buffer.enable=${enable_overlong_buffer} \ + reward_model.overlong_buffer.len=${overlong_buffer_len} \ + reward_model.overlong_buffer.penalty_factor=${overlong_penalty_factor} \ + trainer.logger=['console'] \ + trainer.project_name="${project_name}" \ + trainer.experiment_name="${exp_name}" \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes="${NNODES}" \ + trainer.val_before_train=False \ + trainer.test_freq=5 \ + trainer.save_freq=-1 \ + trainer.total_epochs=1 \ + trainer.total_training_steps=100 \ + trainer.default_local_dir="${CKPTS_DIR}" \ + trainer.resume_mode=auto \ + data.shuffle=False \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.ref.use_torch_compile=False \ + actor_rollout_ref.actor.entropy_checkpointing=True \ + actor_rollout_ref.ref.entropy_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.forward_prefetch=True \ + actor_rollout_ref.ref.fsdp_config.forward_prefetch=True \ + actor_rollout_ref.actor.fsdp_config.backward_prefetch=BACKWARD_PRE \ + actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \ + actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \ + actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_dapo_full.log 2>&1 & + +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_dapo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'32p'_'full' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh index 8df492ae97..457a060dce 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh @@ -1,6 +1,78 @@ -#!/usr/bin/env bash -set -euxo pipefail +#!/bin/bash +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_32b_instruct_dapo_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_32b_instruct_DAPO_performance_32p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +# 训练配置 project_name='DAPO' exp_name='DAPO-Qwen2.5-32B-Instruct' @@ -33,14 +105,14 @@ train_prompt_mini_bsz=1 # Ray RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"} WORKING_DIR=${WORKING_DIR:-"${PWD}"} -RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env_32b.yaml"} +RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"} NNODES=${NNODES:-2} # Paths RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"} -MODEL_PATH=${MODEL_PATH:-"${RAY_DATA_HOME}/Qwen2.5-32B-Instruct"} +MODEL_PATH=${MODEL_PATH:-"${model_path}"} CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"} -TRAIN_FILE=${TRAIN_FILE:-"${RAY_DATA_HOME}/DAPO-Math-17k/data/dapo-math-17k.parquet"} -TEST_FILE=${TEST_FILE:-"${RAY_DATA_HOME}/AIME-2024/data/aime-2024.parquet"} +TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"} +TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"} # Algorithm temperature=1.0 @@ -55,7 +127,7 @@ infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size)) offload=True gen_tp=4 -ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ +nohup ray job submit --runtime-env="${RUNTIME_ENV}" \ --working-dir "${WORKING_DIR}" \ -- python3 -m recipe.dapo.src.main_dapo \ data.train_files="${TRAIN_FILE}" \ @@ -128,6 +200,7 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ trainer.test_freq=5 \ trainer.save_freq=-1 \ trainer.total_epochs=1 \ + trainer.total_training_steps=10 \ trainer.default_local_dir="${CKPTS_DIR}" \ trainer.resume_mode=auto \ data.shuffle=False \ @@ -140,4 +213,39 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ actor_rollout_ref.actor.fsdp_config.backward_prefetch=BACKWARD_PRE \ actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \ actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \ - actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True \ No newline at end of file + actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_dapo_perf.log 2>&1 & + +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_dapo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'32p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh new file mode 100644 index 0000000000..a4a084f3d0 --- /dev/null +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh @@ -0,0 +1,253 @@ +#!/bin/bash + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_7b_instruct_dapo_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_7b_instruct_DAPO_full_16p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +# 训练配置 +project_name='DAPO' +exp_name='DAPO-Qwen2.5-7B-Instruct' + +adv_estimator=grpo + +use_kl_in_reward=False +kl_coef=0.0 +use_kl_loss=False +kl_loss_coef=0.0 + +clip_ratio_low=0.2 +clip_ratio_high=0.28 + +max_prompt_length=$((1024 * 2)) +max_response_length=$((1024 * 20)) +enable_overlong_buffer=True +overlong_buffer_len=$((1024 * 4)) +overlong_penalty_factor=1.0 + +loss_agg_mode="token-mean" + +enable_filter_groups=False +filter_groups_metric=acc +max_num_gen_batches=10 +train_prompt_bsz=16 +gen_prompt_bsz=$((train_prompt_bsz * 3)) +n_resp_per_prompt=16 +train_prompt_mini_bsz=1 + +# Ray +RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"} +WORKING_DIR=${WORKING_DIR:-"${PWD}"} +RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"} +NNODES=${NNODES:-1} +# Paths +RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"} +MODEL_PATH=${MODEL_PATH:-"${model_path}"} +CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"} +TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"} +TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"} + +# Algorithm +temperature=1.0 +top_p=1.0 +top_k=-1 # 0 for HF rollout, -1 for vLLM rollout + +# Performance Related Parameter +sp_size=4 +use_dynamic_bsz=True +actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size)) +infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size)) +offload=True +gen_tp=1 + +nohup ray job submit --runtime-env="${RUNTIME_ENV}" \ + --working-dir "${WORKING_DIR}" \ + -- python3 -m recipe.dapo.src.main_dapo \ + data.train_files="${TRAIN_FILE}" \ + data.val_files="${TEST_FILE}" \ + data.prompt_key=prompt \ + data.truncation='left' \ + data.max_prompt_length=${max_prompt_length} \ + data.max_response_length=${max_response_length} \ + data.gen_batch_size=${gen_prompt_bsz} \ + data.train_batch_size=${train_prompt_bsz} \ + actor_rollout_ref.rollout.n=${n_resp_per_prompt} \ + algorithm.adv_estimator=${adv_estimator} \ + algorithm.use_kl_in_reward=${use_kl_in_reward} \ + algorithm.kl_ctrl.kl_coef=${kl_coef} \ + actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \ + actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \ + actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} \ + actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} \ + actor_rollout_ref.actor.clip_ratio_c=10.0 \ + algorithm.filter_groups.enable=${enable_filter_groups} \ + algorithm.filter_groups.max_num_gen_batches=${max_num_gen_batches} \ + algorithm.filter_groups.metric=${filter_groups_metric} \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \ + actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \ + actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \ + actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \ + actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \ + actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \ + actor_rollout_ref.model.path="${MODEL_PATH}" \ + +actor_rollout_ref.model.override_config.attention_dropout=0. \ + +actor_rollout_ref.model.override_config.embd_pdrop=0. \ + +actor_rollout_ref.model.override_config.resid_pdrop=0. \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.actor.optim.lr_warmup_steps=10 \ + actor_rollout_ref.actor.optim.weight_decay=0.1 \ + actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \ + actor_rollout_ref.actor.fsdp_config.param_offload=${offload} \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=${offload} \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.actor.grad_clip=1.0 \ + actor_rollout_ref.actor.loss_agg_mode=${loss_agg_mode} \ + actor_rollout_ref.actor.ulysses_sequence_parallel_size=${sp_size} \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.50 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \ + actor_rollout_ref.rollout.enable_chunked_prefill=True \ + actor_rollout_ref.rollout.max_num_batched_tokens=$((max_prompt_length + max_response_length)) \ + actor_rollout_ref.rollout.temperature=${temperature} \ + actor_rollout_ref.rollout.top_p=${top_p} \ + actor_rollout_ref.rollout.top_k="${top_k}" \ + actor_rollout_ref.rollout.val_kwargs.temperature=${temperature} \ + actor_rollout_ref.rollout.val_kwargs.top_p=${top_p} \ + actor_rollout_ref.rollout.val_kwargs.top_k=${top_k} \ + actor_rollout_ref.rollout.val_kwargs.do_sample=True \ + actor_rollout_ref.rollout.val_kwargs.n=1 \ + actor_rollout_ref.ref.fsdp_config.param_offload=${offload} \ + actor_rollout_ref.ref.ulysses_sequence_parallel_size=${sp_size} \ + actor_rollout_ref.actor.fsdp_config.fsdp_size=-1 \ + reward_model.reward_manager=dapo \ + reward_model.overlong_buffer.enable=${enable_overlong_buffer} \ + reward_model.overlong_buffer.len=${overlong_buffer_len} \ + reward_model.overlong_buffer.penalty_factor=${overlong_penalty_factor} \ + trainer.logger=['console'] \ + trainer.project_name="${project_name}" \ + trainer.experiment_name="${exp_name}" \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes="${NNODES}" \ + trainer.val_before_train=False \ + trainer.test_freq=5 \ + trainer.save_freq=-1 \ + trainer.total_epochs=1 \ + trainer.total_training_steps=100 \ + trainer.default_local_dir="${CKPTS_DIR}" \ + trainer.resume_mode=auto \ + data.shuffle=False \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.ref.use_torch_compile=False \ + actor_rollout_ref.actor.entropy_checkpointing=True \ + actor_rollout_ref.ref.entropy_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.forward_prefetch=True \ + actor_rollout_ref.ref.fsdp_config.forward_prefetch=True \ + actor_rollout_ref.actor.fsdp_config.backward_prefetch=BACKWARD_PRE \ + actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \ + actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \ + actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True \ + actor_rollout_ref.rollout.seed=1234 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_dapo_full.log 2>&1 & + +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_dapo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'16p'_'full' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh index 6376ac0093..3269365d29 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh @@ -1,6 +1,78 @@ -#!/usr/bin/env bash -set -euxo pipefail +#!/bin/bash +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_7b_instruct_dapo_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_7b_instruct_DAPO_performance_16p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +# 训练配置 project_name='DAPO' exp_name='DAPO-Qwen2.5-7B-Instruct' @@ -37,10 +109,10 @@ RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/test/runtime_env.yaml"} NNODES=${NNODES:-1} # Paths RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"} -MODEL_PATH=${MODEL_PATH:-"${RAY_DATA_HOME}/Qwen2.5-7B-Instruct"} +MODEL_PATH=${MODEL_PATH:-"${model_path}"} CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"} -TRAIN_FILE=${TRAIN_FILE:-"${RAY_DATA_HOME}/DAPO-Math-17k/data/dapo-math-17k.parquet"} -TEST_FILE=${TEST_FILE:-"${RAY_DATA_HOME}/AIME-2024/data/aime-2024.parquet"} +TRAIN_FILE=${TRAIN_FILE:-"${data_path}/dapo-math-17k.parquet"} +TEST_FILE=${TEST_FILE:-"${data_path}/aime-2024.parquet"} # Algorithm temperature=1.0 @@ -55,7 +127,7 @@ infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / sp_size)) offload=True gen_tp=1 -ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ +nohup ray job submit --runtime-env="${RUNTIME_ENV}" \ --working-dir "${WORKING_DIR}" \ -- python3 -m recipe.dapo.src.main_dapo \ data.train_files="${TRAIN_FILE}" \ @@ -128,6 +200,7 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ trainer.test_freq=5 \ trainer.save_freq=-1 \ trainer.total_epochs=1 \ + trainer.total_training_steps=10 \ trainer.default_local_dir="${CKPTS_DIR}" \ trainer.resume_mode=auto \ data.shuffle=False \ @@ -141,4 +214,40 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ actor_rollout_ref.ref.fsdp_config.backward_prefetch=BACKWARD_PRE \ actor_rollout_ref.actor.use_entropy_from_logits_with_chunking=True \ actor_rollout_ref.ref.use_entropy_from_logits_with_chunking=True \ - actor_rollout_ref.rollout.seed=1234 + actor_rollout_ref.rollout.seed=1234 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_dapo_perf.log 2>&1 & + +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_dapo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'16p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log -- Gitee