diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
index 9c2484f701f83ae038ff20d6c52c4b7a7de1eef4..ac8db9f8d029fb0872ceec5ea0abde5e3ee1009b 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
@@ -294,7 +294,7 @@ class MultiheadAttention(nn.Module):
 
         attn_weights = F.softmax(attn_weights, dim=-1)
         if self.training:
-            attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout)
+            attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training)
 
         attn = strided_bmm2(attn_weights, v)
         assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh
index 48e7fb3af7decbc497fd4f94d3464f1fc9046a98..6761832491b7719b9b6b7f7c33cef6143a88ff21 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh
@@ -62,8 +62,11 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=0
 export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
 export PTCOPY_ENABLE=1
 export TASK_QUEUE_ENABLE=1
-export DYNAMIC_OP="ADD#MUL"
+#export DYNAMIC_OP="ADD#MUL"
+export COMBINED_ENABLE=1
+export SCALAR_TO_HOST_MEM=1
 start_time=$(date +%s)
+
 python3 -u train_1p.py \
   $data_path \
   --device-id ${ASCEND_DEVICE_ID}\
@@ -81,7 +84,7 @@ python3 -u train_1p.py \
   --min-lr 0.0 \
   --dropout 0.1 \
   --weight-decay 0.0 \
-  --criterion label_smoothed_cross_entropy \
+  --criterion cross_entropy \
   --label-smoothing 0.1 \
   --max-sentences 128\
   --max-tokens 102400\
@@ -97,7 +100,7 @@ python3 -u train_1p.py \
     
 wait
 sed -i "s|if i>100:break|if i>100:pass|g" train_1p.py
-sed -i "s|if m >=2:break|if i>100:pass|g" train_1p.py
+sed -i "s|if m >=2:break|if m>=2:pass|g" train_1p.py
 ##################获取训练数据################
 #训练结束时间，不需要修改
 end_time=$(date +%s)
diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh
index 3e0bab82c3c675a33bd37cc027ea979a74f29e19..ced96a4427c2ead5552a0d09c962361526edce93 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh
@@ -10,10 +10,12 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=0
 export ASCEND_GLOBAL_LOG_LEVEL=3
 export PTCOPY_ENABLE=1
 export TASK_QUEUE_ENABLE=1
-export DYNAMIC_OP="ADD#MUL"
+#export DYNAMIC_OP="ADD#MUL"
+export COMBINED_ENABLE=1
+export SCALAR_TO_HOST_MEM=1
 
 python3 -u train_1p.py \
-  ./data/dataset/wmt14_en_de_joined_dict/ \
+./data/dataset/wmt14_en_de_joined_dict/ \
   --device-id 7\
   --arch transformer_wmt_en_de \
   --share-all-embeddings \
@@ -29,7 +31,7 @@ python3 -u train_1p.py \
   --min-lr 0.0 \
   --dropout 0.1 \
   --weight-decay 0.0 \
-  --criterion label_smoothed_cross_entropy \
+  --criterion cross_entropy \
   --label-smoothing 0.1 \
   --max-sentences 128\
   --max-tokens 102400\