From 8a687c27f5a60fefac20405a60cea3c98bf0f9c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= <zhouheguang@h-partners.com>
Date: Sat, 2 Apr 2022 01:21:09 +0000
Subject: [PATCH 1/4] update
 PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py

---
 .../modules/multihead_attention.py                              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
index 9c2484f701..f2c7bd03e8 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
@@ -294,7 +294,7 @@ class MultiheadAttention(nn.Module):
 
         attn_weights = F.softmax(attn_weights, dim=-1)
         if self.training:
-            attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout)
+            attn_weights = F.dropout(attn_weights, p=self.dropout, train=self.training)
 
         attn = strided_bmm2(attn_weights, v)
         assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
-- 
Gitee


From 8c5c095a97ce892eb017788d6d982adc072f9790 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= <zhouheguang@h-partners.com>
Date: Sat, 2 Apr 2022 01:25:33 +0000
Subject: [PATCH 2/4] update
 PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh

---
 .../dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh    | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh
index 3e0bab82c3..ced96a4427 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh
@@ -10,10 +10,12 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=0
 export ASCEND_GLOBAL_LOG_LEVEL=3
 export PTCOPY_ENABLE=1
 export TASK_QUEUE_ENABLE=1
-export DYNAMIC_OP="ADD#MUL"
+#export DYNAMIC_OP="ADD#MUL"
+export COMBINED_ENABLE=1
+export SCALAR_TO_HOST_MEM=1
 
 python3 -u train_1p.py \
-  ./data/dataset/wmt14_en_de_joined_dict/ \
+./data/dataset/wmt14_en_de_joined_dict/ \
   --device-id 7\
   --arch transformer_wmt_en_de \
   --share-all-embeddings \
@@ -29,7 +31,7 @@ python3 -u train_1p.py \
   --min-lr 0.0 \
   --dropout 0.1 \
   --weight-decay 0.0 \
-  --criterion label_smoothed_cross_entropy \
+  --criterion cross_entropy \
   --label-smoothing 0.1 \
   --max-sentences 128\
   --max-tokens 102400\
-- 
Gitee


From c51efa8972d6cc28ac523afc80008f7019daeb99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= <zhouheguang@h-partners.com>
Date: Sat, 2 Apr 2022 01:29:35 +0000
Subject: [PATCH 3/4] update
 PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh

---
 .../test/train_performance_1p.sh                         | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh
index 48e7fb3af7..6761832491 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh
@@ -62,8 +62,11 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=0
 export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
 export PTCOPY_ENABLE=1
 export TASK_QUEUE_ENABLE=1
-export DYNAMIC_OP="ADD#MUL"
+#export DYNAMIC_OP="ADD#MUL"
+export COMBINED_ENABLE=1
+export SCALAR_TO_HOST_MEM=1
 start_time=$(date +%s)
+
 python3 -u train_1p.py \
   $data_path \
   --device-id ${ASCEND_DEVICE_ID}\
@@ -81,7 +84,7 @@ python3 -u train_1p.py \
   --min-lr 0.0 \
   --dropout 0.1 \
   --weight-decay 0.0 \
-  --criterion label_smoothed_cross_entropy \
+  --criterion cross_entropy \
   --label-smoothing 0.1 \
   --max-sentences 128\
   --max-tokens 102400\
@@ -97,7 +100,7 @@ python3 -u train_1p.py \
     
 wait
 sed -i "s|if i>100:break|if i>100:pass|g" train_1p.py
-sed -i "s|if m >=2:break|if i>100:pass|g" train_1p.py
+sed -i "s|if m >=2:break|if m>=2:pass|g" train_1p.py
 ##################获取训练数据################
 #训练结束时间，不需要修改
 end_time=$(date +%s)
-- 
Gitee


From 862c2e004951cf9da16b90f2f777758677ad5b3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= <zhouheguang@h-partners.com>
Date: Sat, 2 Apr 2022 02:54:57 +0000
Subject: [PATCH 4/4] update
 PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py.

---
 .../modules/multihead_attention.py                              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
index f2c7bd03e8..ac8db9f8d0 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py
@@ -294,7 +294,7 @@ class MultiheadAttention(nn.Module):
 
         attn_weights = F.softmax(attn_weights, dim=-1)
         if self.training:
-            attn_weights = F.dropout(attn_weights, p=self.dropout, train=self.training)
+            attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training)
 
         attn = strided_bmm2(attn_weights, v)
         assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
-- 
Gitee