From 8a687c27f5a60fefac20405a60cea3c98bf0f9c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= Date: Sat, 2 Apr 2022 01:21:09 +0000 Subject: [PATCH 1/4] update PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py --- .../modules/multihead_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py index 9c2484f701..f2c7bd03e8 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py @@ -294,7 +294,7 @@ class MultiheadAttention(nn.Module): attn_weights = F.softmax(attn_weights, dim=-1) if self.training: - attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout) + attn_weights = F.dropout(attn_weights, p=self.dropout, train=self.training) attn = strided_bmm2(attn_weights, v) assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] -- Gitee From 8c5c095a97ce892eb017788d6d982adc072f9790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= Date: Sat, 2 Apr 2022 01:25:33 +0000 Subject: [PATCH 2/4] update PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh --- .../dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh index 3e0bab82c3..ced96a4427 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_1p.sh @@ -10,10 +10,12 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=0 export ASCEND_GLOBAL_LOG_LEVEL=3 export PTCOPY_ENABLE=1 export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" +#export DYNAMIC_OP="ADD#MUL" +export COMBINED_ENABLE=1 +export SCALAR_TO_HOST_MEM=1 python3 -u train_1p.py \ - ./data/dataset/wmt14_en_de_joined_dict/ \ +./data/dataset/wmt14_en_de_joined_dict/ \ --device-id 7\ --arch transformer_wmt_en_de \ --share-all-embeddings \ @@ -29,7 +31,7 @@ python3 -u train_1p.py \ --min-lr 0.0 \ --dropout 0.1 \ --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ + --criterion cross_entropy \ --label-smoothing 0.1 \ --max-sentences 128\ --max-tokens 102400\ -- Gitee From c51efa8972d6cc28ac523afc80008f7019daeb99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= Date: Sat, 2 Apr 2022 01:29:35 +0000 Subject: [PATCH 3/4] update PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh --- .../test/train_performance_1p.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh index 48e7fb3af7..6761832491 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh @@ -62,8 +62,11 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=0 export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 export PTCOPY_ENABLE=1 export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" +#export DYNAMIC_OP="ADD#MUL" +export COMBINED_ENABLE=1 +export SCALAR_TO_HOST_MEM=1 start_time=$(date +%s) + python3 -u train_1p.py \ $data_path \ --device-id ${ASCEND_DEVICE_ID}\ @@ -81,7 +84,7 @@ python3 -u train_1p.py \ --min-lr 0.0 \ --dropout 0.1 \ --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ + --criterion cross_entropy \ --label-smoothing 0.1 \ --max-sentences 128\ --max-tokens 102400\ @@ -97,7 +100,7 @@ python3 -u train_1p.py \ wait sed -i "s|if i>100:break|if i>100:pass|g" train_1p.py -sed -i "s|if m >=2:break|if i>100:pass|g" train_1p.py +sed -i "s|if m >=2:break|if m>=2:pass|g" train_1p.py ##################获取训练数据################ #训练结束时间,不需要修改 end_time=$(date +%s) -- Gitee From 862c2e004951cf9da16b90f2f777758677ad5b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=92=8C=E5=85=89?= Date: Sat, 2 Apr 2022 02:54:57 +0000 Subject: [PATCH 4/4] update PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py. --- .../modules/multihead_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py index f2c7bd03e8..ac8db9f8d0 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modules/multihead_attention.py @@ -294,7 +294,7 @@ class MultiheadAttention(nn.Module): attn_weights = F.softmax(attn_weights, dim=-1) if self.training: - attn_weights = F.dropout(attn_weights, p=self.dropout, train=self.training) + attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training) attn = strided_bmm2(attn_weights, v) assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] -- Gitee