diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py index 66d8b9b4db69ebfeca6025682308fe3ee3f1c926..3ef0a377ad8ce2e78041e3182295d561ff54e796 100644 --- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py +++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py @@ -21,6 +21,8 @@ import time import crnn import utils import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn.parallel from torch.utils.data import DataLoader from apex import amp diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py index cc46a75bb84b7556ee1b36bc95ef35b66306b64c..c1cdcb02828323a2ec21055a41ab0b917877df4b 100644 --- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py +++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py @@ -21,6 +21,8 @@ import time import crnn import utils import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn.parallel from torch.utils.data import DataLoader from apex import amp diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py index 28d3c6e7767ceb8658485c7f904482f46eab2fe6..aaca74eeb689357429cab0fbc51781eacd29d40b 100644 --- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py +++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py @@ -21,6 +21,8 @@ import time import crnn import utils import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn.parallel from torch.utils.data import DataLoader from apex import amp diff --git a/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py b/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py index 11c3a0450449f7b82eb1c96e223df0bf10b6e675..5c050033fafe23f2cdd13352a5f971d1b3aad435 100644 --- a/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py +++ b/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py @@ -37,6 +37,8 @@ import time import sys import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.utils.data from torch import nn import torchvision diff --git a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py index 11ef66c3422bb1aa2833c3d55d8ad50ab95c090e..7d58914bd62d5fe7b12d76b3e870163a3644e983 100644 --- a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py +++ b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py @@ -37,6 +37,8 @@ import time import sys import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.utils.data from torch import nn import torchvision diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py index 79abe7f165038c41a9211278de3aaea587f12795..9e5619e7daf05e0656b5dcdae4e3b091e53505b2 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py @@ -24,6 +24,8 @@ import time import warnings import torch +if torch.__version__ >= "1.8.1": + import torch_npu import apex import torch.nn as nn import torch.nn.parallel diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py index c87fda1332ba1fa939faef6f634235fd1fbb5536..efbe6b48a0b5c1325b660e996bbad0ec17410f0d 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py @@ -24,6 +24,8 @@ import warnings import apex import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn diff --git a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py index cf24ca704c3a8f237ff76a2b4e77ea2269a3d48c..8834d9d0f7999da348d58c04d64c53ebfca116be 100644 --- a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py +++ b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py @@ -30,6 +30,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch +if torch.__version__ >= "1.8.1": + import torch_npu import sys import os import random diff --git a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py index 912ee61d190021c1b5bb14b49a502486b56a350e..6800b256e256d648468eb0463bf4aa5a331bebb7 100644 --- a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py +++ b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py @@ -30,6 +30,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch +if torch.__version__ >= "1.8.1": + import torch_npu import sys import os import random diff --git a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py index db8d481c59f6065daf17816a99a44607058e3ff0..6c2dfafd8b3fde72e11652bc20fb01dfb5ac0ddd 100644 --- a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py +++ b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py @@ -36,6 +36,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import numpy as np @@ -62,6 +64,8 @@ class DroupoutV2(nn.Module): if not self.checked: self.check_self(x) - - x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.p) + else: + x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p) return x diff --git a/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py b/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py index 7f551516c30d4c603120d263778e70e60a76ade9..81e07773cb0fb2749c29d8e0da8aa6994e2fcecc 100644 --- a/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py +++ b/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py @@ -20,6 +20,8 @@ import argparse import time import os import torch +if torch.__version__ >= "1.8.1": + import torch_npu import yaml import torch.distributed as dist diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt index 1d547c4650ac742f7f8a18cf0ae7e0ca90032224..d7a8589be6f463b04248db0e2219cd91a6fb157b 100644 --- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt +++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt @@ -4,4 +4,4 @@ torchvision==0.2.2.post2 fvcore pycocotools cloudpickle -tensorboard \ No newline at end of file +tensorboard == 1.15.0 \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt index 0214cf1dd56be9f3b9a5c5649e39ba582425101c..79eb67fc7b2ae0fac545b089a4efb4729d5a90ee 100644 --- a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt +++ b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt @@ -4,4 +4,4 @@ matplotlib pycocotools == 2.0.2 tqdm pillow -tensorboard == 1.14 +tensorboard == 1.15.0 diff --git a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py index ed0add743b465a8a7bd6eec92292a18f64f443b4..ffcaef1e01426d260a6bc215b69d9a90fca8b865 100644 --- a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py +++ b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py @@ -27,6 +27,8 @@ import torch.nn.functional as F import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler import torch.utils.data +if torch.__version__ >= "1.8.1": + import torch_npu import yaml import apex from torch.nn.parallel import DistributedDataParallel as DDP diff --git a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py index 5e9afdb74628f21c44a38f9ba8253396f9f77688..892cc74f676a35b8be8c65fd8fa67b5b2c65a37c 100644 --- a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py +++ b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py @@ -27,6 +27,8 @@ import torch.nn.functional as F import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler import torch.utils.data +if torch.__version__ >= "1.8.1": + import torch_npu import yaml import apex diff --git a/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py b/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py index 2fcc80ed2790c6a68d1ff30dff1a67c4a635e81d..a4d26fd13e63c60ac639bf004ddbfd3ffc714b92 100644 --- a/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py +++ b/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py @@ -21,6 +21,8 @@ import warnings import mmcv import torch +if torch.__version__ >= "1.8.1": + import torch_npu from mmcv import Config, DictAction from mmcv.runner import get_dist_info, init_dist from mmcv.utils import get_git_hash diff --git a/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py b/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py index 8ad82cf02ded023e6a25dcdc7fe2f9b69045996e..7bb18b1902884e6b0480f5bea5ddff335b4d1415 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py +++ b/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py @@ -13,6 +13,8 @@ # limitations under the License. import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import numpy as np @@ -69,8 +71,10 @@ class DropoutV2(nn.Module): if not self.checked: self.check_self(x) - - x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.p) + else: + x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p) return x diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index a5203e848d4513cb9969823dcd6115fc5c179f71..4f5204fe17439ac41046fd5c6d48099053f9caaa 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -29,6 +29,8 @@ from io import open import numpy as np import torch +if torch.__version__ >= "1.8.1": + import torch_npu from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, TensorDataset) from torch.utils.data.distributed import DistributedSampler @@ -80,11 +82,12 @@ class NpuFusedBertAdamV2(NpuFusedBertAdam): group['warmup']) else: lr_scheduled = group['lr'] - combined_param.data, exp_avg, exp_avg_sq = torch.npu_bert_apply_adam(combined_param.data, exp_avg, - exp_avg_sq, lr_scheduled, beta1, beta2, + combined_param.data, exp_avg, exp_avg_sq = torch.npu_bert_apply_adam(lr_scheduled, beta1, beta2, group['e'], combined_grad.data, group['max_grad_norm'], 0, - group['weight_decay']) + group['weight_decay'], + out=(combined_param.data, exp_avg, + exp_avg_sq)) combined_param_state['step'] += 1 diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py b/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py index eb558dc9fc7f77571f3db09c73eee4e2f802f3ba..cbbbae4cb0e672cdda419ae7a9a5d8cb3678010b 100644 --- a/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py +++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py @@ -1,4 +1,6 @@ import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn @@ -20,7 +22,10 @@ class Decoder(nn.Module): input = input.unsqueeze(0) embedded = self.embedding(input) if self.training: - embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob) + if torch.__version__ >= "1.8.1": + embedded = nn.functional.dropout(embedded, p=self.prob) + else: + embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob) emb_con = torch.cat((embedded, context), dim=2) output, hidden = self.rnn(emb_con, hidden) diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py b/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py index f9e4f40b8ac40f0ab88059b9a8ee7e9513e0b5cf..b25b078dd0c674af196b311d18df76a258438f0e 100644 --- a/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py +++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py @@ -1,4 +1,6 @@ import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn @@ -18,7 +20,10 @@ class Encoder(nn.Module): embedded = self.embedding(src) if self.training: - embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob) + if torch.__version__ >= "1.8.1": + embedded = nn.functional.dropout(embedded, p=self.prob) + else: + embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob) outputs, hidden = self.rnn(embedded) # no cell state! diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py index 1d3614e68c5df535bab9e3ff3b685bfcea017ddf..9db696766ae52fbc533af33725ba6fa2c50b2240 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py @@ -17,6 +17,8 @@ import math import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import editdistance as ed from collections import OrderedDict @@ -56,7 +58,10 @@ class BatchRNN(nn.Module): x = torch.cat((x_post, x_reverse), 2) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.prob) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) return x @@ -93,7 +98,10 @@ class LayerCNN(nn.Module): if self.pooling is not None: x = self.pooling(x) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.prob) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) return x diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py index 9a8c28cd7646f63402669d0202f365a315405e69..40845c3a43c3feaa75516e2993bf132e8c942b1a 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py @@ -19,6 +19,8 @@ import os import time import sys import torch +if torch.__version__ >= "1.8.1": + import torch_npu import yaml import argparse import torch.nn as nn diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py index f3471226dc3a0f5a4e12275c7936e982cde9bacb..1aab27976e58f2b645e711e290d8125f76760364 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py @@ -26,6 +26,8 @@ import argparse import numpy as np import random import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import torch.backends.cudnn as cudnn import apex diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py index 0641c341836e9558f3069033aa8db1441472fbc9..2f650e997cc09f9e4753a87a499926dfa8369f3c 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py @@ -17,6 +17,8 @@ import math import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import editdistance as ed from collections import OrderedDict @@ -56,7 +58,10 @@ class BatchRNN(nn.Module): x = torch.cat((x_post, x_reverse), 2) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.prob) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) return x @@ -94,7 +99,10 @@ class LayerCNN(nn.Module): if self.pooling is not None: x = self.pooling(x) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.prob) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob) return x diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py index 070d7055a23f7c9a9984a3a6c192197200f1a8dc..9d483253aece9d37704a3c5ba6dc5d715041dbf5 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py @@ -19,6 +19,8 @@ import os import time import sys import torch +if torch.__version__ >= "1.8.1": + import torch_npu import yaml import argparse import torch.nn as nn diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py index 6662ed15e236e92a5664a346630fde7e3e66a48b..4c0d6ace0a14ff6eba3cf36bb6986a26dd89271e 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py @@ -25,6 +25,8 @@ import yaml import argparse import random import torch +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn as nn import torch.backends.cudnn as cudnn from torch.utils.tensorboard import SummaryWriter diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt index cdbbb913ef3b46275ecbd0bc8780c2fede9019a3..5aa5ab416ed3dac3326a488258cb89a165ba73d0 100644 --- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt +++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt @@ -1,4 +1,4 @@ torchvision kaldiio==2.17.2 editdistance==0.5.3 -tensorboard==1.14.0 \ No newline at end of file +tensorboard==1.15.0 \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py index 399569939354354306b37b693f7319aa96cbb8c5..0872a5f439fdf575cc6c4eb4070e41665bb0c7c6 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py @@ -24,6 +24,8 @@ import math import torch import torch.nn as nn +if torch.__version__ >= "1.8.1": + import torch_npu import torch.nn.functional as F from torch import Tensor from typing import Optional, Dict @@ -185,7 +187,10 @@ class TransformerEncoder(nn.Module): if self.embed_positions is not None: x += self.embed_positions(src_tokens) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) # B:batch size ; T: seq length ; C: embedding dim 512 # B x T x C -> T x B x C @@ -267,7 +272,10 @@ class TransformerDecoder(IncrementalDecoder): if positions is not None: x += positions if self.training: - x,_,_ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x,_,_ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) # B x T x C -> T x B x C x = x.transpose(0, 1) @@ -320,17 +328,26 @@ class TransformerEncoderLayer(nn.Module): need_weights=False, static_kv=False) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) x = residual + x x = self.ln1(x) residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.relu_dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout) x = self.fc2(x) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p =self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) x = residual + x x = self.ln2(x) return x @@ -384,7 +401,10 @@ class TransformerDecoderLayer(nn.Module): ) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) x = residual + x x = self.self_attn_layer_norm(x) @@ -403,7 +423,10 @@ class TransformerDecoderLayer(nn.Module): need_weights=(not self.training and self.need_attn), ) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) x = residual + x x = self.encoder_attn_layer_norm(x) @@ -411,10 +434,16 @@ class TransformerDecoderLayer(nn.Module): residual = x x = F.threshold(self.fc1(x), 0.0, 0.0) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.relu_dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout) x = self.fc2(x) if self.training: - x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + x = nn.functional.dropout(x, p=self.dropout) + else: + x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout) x = residual + x x = self.layer_norm(x) return x, attn diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py index ba5e353d0e44ca13cc2374477533f2ea1ae94a23..d640b25a44880d6ef38227081ea8b2d18b1c4d58 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py @@ -24,6 +24,8 @@ from typing import Dict, Optional import torch +if torch.__version__ >= "1.8.1": + import torch_npu from torch import nn, Tensor from torch.nn import Parameter import torch.nn.functional as F @@ -294,7 +296,10 @@ class MultiheadAttention(nn.Module): attn_weights = F.softmax(attn_weights, dim=-1) if self.training: - attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout) + if torch.__version__ >= "1.8.1": + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout) + else: + attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout) attn = strided_bmm2(attn_weights, v) assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py index 30b8685b000288dce957e1d1fb24212c6f04f37b..0748b7bdb35a2027ff2ab099376a12c2fc570ebc 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py @@ -19,6 +19,8 @@ from apex.fp16_utils import master_params_to_model_params from apex.multi_tensor_apply import multi_tensor_applier from ._amp_state import maybe_print import torch +if torch.__version__ >= "1.8.1": + import torch_npu from apex.optimizers import FusedSGD, NpuFusedAdam, NpuFusedSGD, NpuFusedAdadelta from change_data_ptr import change_data_ptr from apex.contrib.combine_tensors import combine_npu @@ -225,8 +227,8 @@ def combined_init_with_master_weights(stash): stash.combined_tensor_fp16, stash.fp16_param_grad_list = get_grad_combined_tensor_from_param(stash.all_fp16_params) for model_grad, master in zip(stash.fp16_param_grad_list, stash.all_fp32_from_fp16_params): master.grad = torch.empty_like(model_grad.to(torch.float)) - master.data = master.data.npu_format_cast(model_grad.storage().npu_format()) - + master.data = master.data.npu_format_cast(torch.get_npu_format(model_grad)) + stash.combined_tensor_fp32_from_fp16, stash.fp32_from_fp16_param_grad_list = get_grad_combined_tensor_from_param(stash.all_fp32_from_fp16_params) stash.combined_tensor_fp32, stash.fp32_param_grad_list = get_grad_combined_tensor_from_param(stash.all_fp32_from_fp32_params) # please do not change the order of tensor in this list.