diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md
index 708b2243700472741188235213ccc52ecf4fe79b..91c8165508b45c54b6a6a030422ac3c3af64f9c4 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md
@@ -126,13 +126,11 @@ cd ${wenet_path}
 git checkout .
 patch -p1 < get_no_flash_encoder_out.diff
 cd ${wenet_path}/examples/aishell/s0/
-bash run_no_flash_encoder_out.sh
-mv encoder_data_noflash encoder_data
+bash run_no_flash_encoder_out.sh --bin_path encoder_data_noflash --model_path ./no_flash_encoder_revise.om --json_path encoder_noflash.json --perf_json t1.json
 ```
 
 - 获取非流式场景下decoder处理结果：
 
-  注意修改、wenet/bin/recognize_attension_rescoring.py中json_path的路径，为了测试返回的性能，json_path里面带有"no"关键字
 
 
 ```
@@ -140,7 +138,7 @@ cd ${wenet_path}
 git checkout .
 patch -p1 < getwer.diff
 cd ${wenet_path}/examples/aishell/s0/
-bash run_attention_rescoring.sh
+bash run_attention_rescoring.sh --model_path ./decoder_final.om --perf_json t2.json --json_path encoder_noflash.json --bin_path encoder_data_noflash
 ```
 
 - 查看overall精度
@@ -171,7 +169,7 @@ cd ${wenet_path}
 git checkout .
 patch -p1 < get_flash_encoder_out.diff
 cd ${wenet_path}/examples/aishell/s0/
-bash run_encoder_out.sh
+bash run_encoder_out.sh --bin_path encoder_data --model_path encoder_revise.om --json_path encoder.json
 ```
 
 - 获取流式场景下，decoder处理结果：
@@ -184,7 +182,7 @@ cd ${wenet_path}
 git checkout .
 patch -p1 < getwer.diff
 cd ${wenet_path}/examples/aishell/s0/
-bash run_attention_rescoring.sh
+bash run_attention_rescoring.sh --model_path ./decoder_final.om --json_path  encoder.json --bin_path ./encoder_data
 ```
 
 - 查看overall精度
@@ -216,14 +214,13 @@ cp ${code_path}/decoder_fendang.om ${wenet_path}/
 
 - 精度测试:
 
-  - 设置日志等级export ASCEND_GLOBAL_LOG_LEVEL=3，指定acc.diff中self.encoder_ascend， self.decoder_ascend加载的文件为静态转出的encoder，decoder模型，修改run.sh中average_checkpoint为false, decode_modes修改为attention_rescoring， stage=5 decode阶段185、198行修改python为python3.7.5, 185行recognize.py修改为static.py
 
 ```
 cd ${wenet_path}/
 git checkout .
 patch -p1 < acc.diff
 cd ${wenet_path}/examples/aishell/s0/
-bash run.sh --stage 5 --stop_stage 5
+bash run_static.sh
 ```
 
 - 查看overall精度
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_flash.py b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_flash.py
index 690b4f5bdeff7dda54c5de1dab23f10345316849..ff17af1345adafa662a3b0b31f519723ebb5d1a5 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_flash.py
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_flash.py
@@ -54,7 +54,7 @@ import os
 
 def dic2json(input_dict, json_path):
     json_str = json.dumps(input_dict)
-    with open(json_path, 'a') as json_file:
+    with open(json_path, 'w+') as json_file:
         json_file.write(json_str)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='recognize with your model')
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py
index 945465ca0f674f6fba9ff5791907bce2a7fa1037..88946937c40343a9240ab2b5075da58b215a131d 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py
@@ -53,7 +53,7 @@ import acl
 from wenet.transformer.acl_net import Net
 def dic2json(input_dict, json_path):
     json_str = json.dumps(input_dict)
-    with open(json_path, 'a') as json_file:
+    with open(json_path, 'w+') as json_file:
         json_file.write(json_str)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='recognize with your model')
@@ -77,6 +77,7 @@ if __name__ == '__main__':
     parser.add_argument('--bin_path', type=str, default="./encoder_data_noflash", help='encoder bin images dir')
     parser.add_argument('--model_path', type=str, default="no_flash_encoder_revise.om", help='encoder bin images dir')
     parser.add_argument('--json_path', type=str, default="encoder_noflash_all.json", help='encoder bin images dir')
+    parser.add_argument('--perf_json', type=str, default="t1.json", help='no flash encoder time')
     parser.add_argument('--batch_size',
                         type=int,
                         default=16,
@@ -209,6 +210,6 @@ if __name__ == '__main__':
     ave_t = total_t / (batch_idx + 1)
     dic_perf = {}
     dic_perf["t1"] = ave_t
-    dic2json(dic_perf, "t1.json")
+    dic2json(dic_perf, args.perf_json)
     dic2json(encoder_dic, args.json_path)
     del encoder_model_noflash
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py
index 1c8850d6e7cda424f39c58a1bff697269444af28..6bc3c672608154579a4be1326f1c5428c677bc8b 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py
@@ -55,7 +55,7 @@ import os
 
 def dic2json(input_dict, json_path):
     json_str = json.dumps(input_dict)
-    with open(json_path, 'a') as json_file:
+    with open(json_path, 'w+') as json_file:
         json_file.write(json_str)
 
 if __name__ == '__main__':
@@ -75,6 +75,7 @@ if __name__ == '__main__':
     parser.add_argument('--bin_path', type=str, default="./encoder_data", help='encoder bin images dir')
     parser.add_argument('--model_path', type=str, default="decoder_final.om", help='encoder bin images dir')
     parser.add_argument('--json_path', type=str, default="encoder.json", help='encoder bin images dir')
+    parser.add_argument('--perf_json', type=str, default="t2.json", help='decoder time')
     parser.add_argument('--penalty',
                         type=float,
                         default=0.0,
@@ -201,6 +202,5 @@ if __name__ == '__main__':
         ave_t = total_t / (batch_idx + 1)
         dic_perf = {}
         dic_perf["t2"] = ave_t
-        if "no" in args.bin_path:
-            dic2json(dic_perf, "t2.json")
+        dic2json(dic_perf, args.perf_json)
     del decoder_model
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_attention_rescoring.sh b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_attention_rescoring.sh
index a97faa7335f16b1fbf4e6e38d8d5be90069a5af3..97743363e9b537bd11ffeed978bc487a336725b8 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_attention_rescoring.sh
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_attention_rescoring.sh
@@ -48,7 +48,10 @@ average_checkpoint=false
 decode_checkpoint=$dir/final.pt
 average_num=30
 decode_modes="attention_rescoring"
-
+model_path=./decoder_final.om
+perf_json=./t2.json
+json_path=./encoder.json
+bin_path=./encoder_data
 . tools/parse_options.sh || exit 1;
 
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
@@ -79,6 +82,10 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
             --batch_size 1 \
             --penalty 0.0 \
             --dict $dict \
+            --model_path ${model_path} \
+            --json_path ${json_path} \
+            --bin_path ${bin_path} \
+            --perf_json ${perf_json} \
             --ctc_weight $ctc_weight \
             --reverse_weight $reverse_weight \
             --result_file $test_dir/text \
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_encoder_out.sh b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_encoder_out.sh
index 4fbf960942d169039c852c31178b3b104f9b5145..1331892e2226158c9bb6664f1a8943146c6a3712 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_encoder_out.sh
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_encoder_out.sh
@@ -48,6 +48,9 @@ average_checkpoint=false
 decode_checkpoint=$dir/final.pt
 average_num=30
 decode_modes="attention_rescoring"
+bin_path=./encoder_data
+model_path=./encoder_revise.om
+json_path=./encoder.json
 
 . tools/parse_options.sh || exit 1;
 
@@ -80,6 +83,9 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
             --penalty 0.0 \
             --dict $dict \
             --ctc_weight $ctc_weight \
+            --bin_path ${bin_path} \
+            --model_path ${model_path} \
+            --json_path ${json_path} \
             --reverse_weight $reverse_weight \
             --result_file $test_dir/text \
             --simulate_streaming \
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_no_flash_encoder_out.sh b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_no_flash_encoder_out.sh
index 0ff98b11f617008dc4396af29e4f1c43ecf0bedb..dfed9dbf2ff089e7a7bab8eab1be5cd0fde9d337 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_no_flash_encoder_out.sh
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_no_flash_encoder_out.sh
@@ -48,7 +48,10 @@ average_checkpoint=false
 decode_checkpoint=$dir/final.pt
 average_num=30
 decode_modes="attention_rescoring"
-
+bin_path=./encoder_data_noflash
+model_path=./no_flash_encoder_revise.om
+json_path=./encoder_noflash.json
+perf_json=./t1.json
 . tools/parse_options.sh || exit 1;
 
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
@@ -78,6 +81,10 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
         --batch_size 1 \
         --penalty 0.0 \
         --dict $dict \
+        --bin_path ${bin_path} \
+        --model_path ${model_path} \
+        --json_path ${json_path} \
+        --perf_json ${perf_json} \
         --ctc_weight $ctc_weight \
         --reverse_weight $reverse_weight \
         --result_file $test_dir/text \
diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_static.sh b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_static.sh
index 5a67f453f817cc2a4d8ce183575f4ae5ccbf6220..daf604683ddfa1885cd2d314fc9e20b986ee2812 100644
--- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_static.sh
+++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/run_static.sh
@@ -51,12 +51,123 @@ decode_modes="attention_rescoring"
 
 . tools/parse_options.sh || exit 1;
 
+if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
+    echo "stage -1: Data Download"
+    local/download_and_untar.sh ${data} ${data_url} data_aishell
+    local/download_and_untar.sh ${data} ${data_url} resource_aishell
+fi
+
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    # Data preparation
+    local/aishell_data_prep.sh ${data}/data_aishell/wav ${data}/data_aishell/transcript
+fi
+
+
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    # remove the space between the text labels for Mandarin dataset
+    for x in train dev test; do
+        cp data/${x}/text data/${x}/text.org
+        paste -d " " <(cut -f 1 -d" " data/${x}/text.org) <(cut -f 2- -d" " data/${x}/text.org | tr -d " ") \
+            > data/${x}/text
+        rm data/${x}/text.org
+    done
+    # For wav feature, just copy the data. Fbank extraction is done in training
+    mkdir -p $feat_dir
+    for x in ${train_set} dev test; do
+        cp -r data/$x $feat_dir
+    done
+
+    tools/compute_cmvn_stats.py --num_workers 16 --train_config $train_config \
+        --in_scp data/${train_set}/wav.scp \
+        --out_cmvn $feat_dir/$train_set/global_cmvn
+
+fi
+
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    # Make train dict
+    echo "Make a dictionary"
+    mkdir -p $(dirname $dict)
+    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
+    echo "<unk> 1" >> ${dict} # <unk> must be 1
+    tools/text2token.py -s 1 -n 1 data/train/text | cut -f 2- -d" " | tr " " "\n" \
+        | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict}
+    num_token=$(cat $dict | wc -l)
+    echo "<sos/eos> $num_token" >> $dict # <eos>
+fi
+
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    nj=32
+    # Prepare wenet requried data
+    echo "Prepare data, prepare requried format"
+    for x in dev test ${train_set}; do
+        tools/format_data.sh --nj ${nj} \
+            --feat-type wav --feat $feat_dir/$x/wav.scp \
+            $feat_dir/$x ${dict} > $feat_dir/$x/format.data.tmp
+
+        tools/remove_longshortdata.py \
+            --min_input_len 0.5 \
+            --max_input_len 20 \
+            --max_output_len 400 \
+            --max_output_input_ratio 10.0 \
+            --data_file $feat_dir/$x/format.data.tmp \
+            --output_data_file $feat_dir/$x/format.data
+    done
+fi
+
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # Training
+    mkdir -p $dir
+    INIT_FILE=$dir/ddp_init
+    # You had better rm it manually before you start run.sh on first node.
+    # rm -f $INIT_FILE # delete old one before starting
+    init_method=file://$(readlink -f $INIT_FILE)
+    echo "$0: init method is $init_method"
+    # The number of gpus runing on each node/machine
+    num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
+    # Use "nccl" if it works, otherwise use "gloo"
+    dist_backend="nccl"
+    # The total number of processes/gpus, so that the master knows
+    # how many workers to wait for.
+    # More details about ddp can be found in
+    # https://pytorch.org/tutorials/intermediate/dist_tuto.html
+    world_size=`expr $num_gpus \* $num_nodes`
+    echo "total gpus is: $world_size"
+    cmvn_opts=
+    $cmvn && cp ${feat_dir}/${train_set}/global_cmvn $dir
+    $cmvn && cmvn_opts="--cmvn ${dir}/global_cmvn"
+    # train.py will write $train_config to $dir/train.yaml with model input
+    # and output dimension, train.yaml will be used for inference or model
+    # export later
+    for ((i = 0; i < $num_gpus; ++i)); do
+    {
+        gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
+        # Rank of each gpu/process used for knowing whether it is
+        # the master of a worker.
+        rank=`expr $node_rank \* $num_gpus + $i`
+        python wenet/bin/train.py --gpu $gpu_id \
+            --config $train_config \
+            --train_data $feat_dir/$train_set/format.data \
+            --cv_data $feat_dir/dev/format.data \
+            ${checkpoint:+--checkpoint $checkpoint} \
+            --model_dir $dir \
+            --ddp.init_method $init_method \
+            --ddp.world_size $world_size \
+            --ddp.rank $rank \
+            --ddp.dist_backend $dist_backend \
+            --num_workers 2 \
+            $cmvn_opts \
+            --pin_memory
+    } &
+    done
+    wait
+fi
+
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # Test model, please specify the model you want to test by --checkpoint
     if [ ${average_checkpoint} == true ]; then
         decode_checkpoint=$dir/avg_${average_num}.pt
         echo "do model average and final checkpoint is $decode_checkpoint"
-        python3 wenet/bin/average_model.py \
+        python wenet/bin/average_model.py \
             --dst_model $decode_checkpoint \
             --src_path $dir  \
             --num ${average_num} \
@@ -65,13 +176,14 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # Specify decoding_chunk_size if it's a unified dynamic chunk trained model
     # -1 for full chunk
     decoding_chunk_size=
-    ctc_weight=0.3
-    reverse_weight=0.3
+    ctc_weight=0.5
+    reverse_weight=0.0
     for mode in ${decode_modes}; do
     {
         test_dir=$dir/test_${mode}
         mkdir -p $test_dir
-        python3.7 wenet/bin/static.py --gpu -1 \
+        python3.7.5 wenet/bin/static.py --gpu 0 \
+            --mode $mode \
             --config $dir/train.yaml \
             --test_data $feat_dir/test/format.data \
             --checkpoint $decode_checkpoint \
@@ -82,10 +194,8 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
             --ctc_weight $ctc_weight \
             --reverse_weight $reverse_weight \
             --result_file $test_dir/text \
-            --simulate_streaming \
-            --decoding_chunk_size 1
             ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
-         python3.7 tools/compute-wer.py --char=1 --v=1 \
+         python3.7.5 tools/compute-wer.py --char=1 --v=1 \
             $feat_dir/test/text $test_dir/text > $test_dir/wer
     } &
     done
@@ -93,3 +203,60 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
 
 fi
 
+if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
+    # Export the best model you want
+    python wenet/bin/export_jit.py \
+        --config $dir/train.yaml \
+        --checkpoint $dir/avg_${average_num}.pt \
+        --output_file $dir/final.zip \
+        --output_quant_file $dir/final_quant.zip
+fi
+
+# Optionally, you can add LM and test it with runtime.
+if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
+    # 7.1 Prepare dict
+    unit_file=$dict
+    mkdir -p data/local/dict
+    cp $unit_file data/local/dict/units.txt
+    tools/fst/prepare_dict.py $unit_file ${data}/resource_aishell/lexicon.txt \
+        data/local/dict/lexicon.txt
+    # 7.2 Train lm
+    lm=data/local/lm
+    mkdir -p $lm
+    tools/filter_scp.pl data/train/text \
+         $data/data_aishell/transcript/aishell_transcript_v0.8.txt > $lm/text
+    local/aishell_train_lms.sh
+    # 7.3 Build decoding TLG
+    tools/fst/compile_lexicon_token_fst.sh \
+        data/local/dict data/local/tmp data/local/lang
+    tools/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
+    # 7.4 Decoding with runtime
+    # reverse_weight only works for u2++ model and only left to right decoder is used when it is set to 0.0.
+    reverse_weight=0.0
+    chunk_size=-1
+    ./tools/decode.sh --nj 16 \
+        --beam 15.0 --lattice_beam 7.5 --max_active 7000 \
+        --blank_skip_thresh 0.98 --ctc_weight 0.5 --rescoring_weight 1.0 \
+        --reverse_weight $reverse_weight --chunk_size $chunk_size \
+        --fst_path data/lang_test/TLG.fst \
+        data/test/wav.scp data/test/text $dir/final.zip \
+        data/lang_test/words.txt $dir/lm_with_runtime
+    # See $dir/lm_with_runtime for wer
+fi
+
+if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
+    # Test model, please specify the model you want to use by --checkpoint
+    # alignment input
+    ali_format=$feat_dir/test/format.data
+    # alignment output
+    ali_result=$dir/ali
+    python wenet/bin/alignment.py --gpu -1 \
+        --config $dir/train.yaml \
+        --input_file $ali_format \
+        --checkpoint $checkpoint \
+        --batch_size 1 \
+        --dict $dict \
+        --result_file $ali_result \
+        --gen_praat
+fi
+