diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py
index 66d8b9b4db69ebfeca6025682308fe3ee3f1c926..3ef0a377ad8ce2e78041e3182295d561ff54e796 100644
--- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py
+++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main.py
@@ -21,6 +21,8 @@ import time
 import crnn
 import utils
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn.parallel
 from torch.utils.data import DataLoader
 from apex import amp
diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py
index cc46a75bb84b7556ee1b36bc95ef35b66306b64c..c1cdcb02828323a2ec21055a41ab0b917877df4b 100644
--- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py
+++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_8p.py
@@ -21,6 +21,8 @@ import time
 import crnn
 import utils
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn.parallel
 from torch.utils.data import DataLoader
 from apex import amp
diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py
index 28d3c6e7767ceb8658485c7f904482f46eab2fe6..aaca74eeb689357429cab0fbc51781eacd29d40b 100644
--- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py
+++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/main_anycard.py
@@ -21,6 +21,8 @@ import time
 import crnn
 import utils
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn.parallel
 from torch.utils.data import DataLoader
 from apex import amp
diff --git a/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py b/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py
index 11c3a0450449f7b82eb1c96e223df0bf10b6e675..5c050033fafe23f2cdd13352a5f971d1b3aad435 100644
--- a/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py
+++ b/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch/references/classification/train.py
@@ -37,6 +37,8 @@ import time
 import sys
 
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.utils.data
 from torch import nn
 import torchvision
diff --git a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py
index 11ef66c3422bb1aa2833c3d55d8ad50ab95c090e..7d58914bd62d5fe7b12d76b3e870163a3644e983 100644
--- a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py
+++ b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/references/classification/train.py
@@ -37,6 +37,8 @@ import time
 import sys
 
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.utils.data
 from torch import nn
 import torchvision
diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py
index 79abe7f165038c41a9211278de3aaea587f12795..9e5619e7daf05e0656b5dcdae4e3b091e53505b2 100644
--- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py
+++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_1p_main.py
@@ -24,6 +24,8 @@ import time
 import warnings
 
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import apex
 import torch.nn as nn
 import torch.nn.parallel
diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py
index c87fda1332ba1fa939faef6f634235fd1fbb5536..efbe6b48a0b5c1325b660e996bbad0ec17410f0d 100644
--- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py
+++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/densenet121_8p_main.py
@@ -24,6 +24,8 @@ import warnings
 
 import apex
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import torch.nn.parallel
 import torch.backends.cudnn as cudnn
diff --git a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py
index cf24ca704c3a8f237ff76a2b4e77ea2269a3d48c..8834d9d0f7999da348d58c04d64c53ebfca116be 100644
--- a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py
+++ b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new.py
@@ -30,6 +30,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import sys
 import os
 import random
diff --git a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py
index 912ee61d190021c1b5bb14b49a502486b56a350e..6800b256e256d648468eb0463bf4aa5a331bebb7 100644
--- a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py
+++ b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/fine_tune_new_8p.py
@@ -30,6 +30,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import sys
 import os
 import random
diff --git a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py
index db8d481c59f6065daf17816a99a44607058e3ff0..6c2dfafd8b3fde72e11652bc20fb01dfb5ac0ddd 100644
--- a/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py
+++ b/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch/models/dropout.py
@@ -36,6 +36,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import numpy as np
 
@@ -62,6 +64,8 @@ class DroupoutV2(nn.Module):
 
         if not self.checked:
             self.check_self(x)
-
-        x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p)
+        if torch.__version__ >= "1.8.1":
+            x = nn.functional.dropout(x, p=self.p)
+        else:
+            x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p)
         return x
diff --git a/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py b/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py
index 7f551516c30d4c603120d263778e70e60a76ade9..81e07773cb0fb2749c29d8e0da8aa6994e2fcecc 100644
--- a/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py
+++ b/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/train.py
@@ -20,6 +20,8 @@ import argparse
 import time
 import os
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import yaml
 import torch.distributed as dist
 
diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt
index 1d547c4650ac742f7f8a18cf0ae7e0ca90032224..d7a8589be6f463b04248db0e2219cd91a6fb157b 100644
--- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/requirements.txt
@@ -4,4 +4,4 @@ torchvision==0.2.2.post2
 fvcore
 pycocotools
 cloudpickle
-tensorboard
\ No newline at end of file
+tensorboard == 1.15.0
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt
index 0214cf1dd56be9f3b9a5c5649e39ba582425101c..79eb67fc7b2ae0fac545b089a4efb4729d5a90ee 100644
--- a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt
+++ b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/requirements.txt
@@ -4,4 +4,4 @@ matplotlib
 pycocotools == 2.0.2
 tqdm
 pillow
-tensorboard == 1.14
+tensorboard == 1.15.0
diff --git a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py
index ed0add743b465a8a7bd6eec92292a18f64f443b4..ffcaef1e01426d260a6bc215b69d9a90fca8b865 100644
--- a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py
+++ b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train.py
@@ -27,6 +27,8 @@ import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
 import torch.utils.data
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import yaml
 import apex
 from torch.nn.parallel import DistributedDataParallel as DDP
diff --git a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py
index 5e9afdb74628f21c44a38f9ba8253396f9f77688..892cc74f676a35b8be8c65fd8fa67b5b2c65a37c 100644
--- a/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py
+++ b/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch/train_8p.py
@@ -27,6 +27,8 @@ import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
 import torch.utils.data
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import yaml
 
 import apex
diff --git a/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py b/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py
index 2fcc80ed2790c6a68d1ff30dff1a67c4a635e81d..a4d26fd13e63c60ac639bf004ddbfd3ffc714b92 100644
--- a/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py
+++ b/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch/tools/train.py
@@ -21,6 +21,8 @@ import warnings
 
 import mmcv
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 from mmcv import Config, DictAction
 from mmcv.runner import get_dist_info, init_dist
 from mmcv.utils import get_git_hash
diff --git a/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py b/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py
index 8ad82cf02ded023e6a25dcdc7fe2f9b69045996e..7bb18b1902884e6b0480f5bea5ddff335b4d1415 100644
--- a/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py
+++ b/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch/utils/dropout.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import numpy as np
 
@@ -69,8 +71,10 @@ class DropoutV2(nn.Module):
 
         if not self.checked:
             self.check_self(x)
-
-        x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p)
+        if torch.__version__ >= "1.8.1":
+            x = nn.functional.dropout(x, p=self.p)
+        else:
+            x, mask, _ = torch.npu_dropoutV2(x, self.seed, p=self.p)
         return x
 
 
diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index a5203e848d4513cb9969823dcd6115fc5c179f71..4f5204fe17439ac41046fd5c6d48099053f9caaa 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -29,6 +29,8 @@ from io import open
 
 import numpy as np
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                               TensorDataset)
 from torch.utils.data.distributed import DistributedSampler
@@ -80,11 +82,12 @@ class NpuFusedBertAdamV2(NpuFusedBertAdam):
                                                            group['warmup'])
             else:
                 lr_scheduled = group['lr']
-            combined_param.data, exp_avg, exp_avg_sq = torch.npu_bert_apply_adam(combined_param.data, exp_avg,
-                                                                                 exp_avg_sq, lr_scheduled, beta1, beta2,
+            combined_param.data, exp_avg, exp_avg_sq = torch.npu_bert_apply_adam(lr_scheduled, beta1, beta2,
                                                                                  group['e'], combined_grad.data,
                                                                                  group['max_grad_norm'], 0,
-                                                                                 group['weight_decay'])
+                                                                                 group['weight_decay'],
+                                                                                 out=(combined_param.data, exp_avg,
+                                                                                 exp_avg_sq))
             combined_param_state['step'] += 1
 
 
diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py b/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py
index eb558dc9fc7f77571f3db09c73eee4e2f802f3ba..cbbbae4cb0e672cdda419ae7a9a5d8cb3678010b 100644
--- a/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py
+++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/decoder.py
@@ -1,4 +1,6 @@
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 
 
@@ -20,7 +22,10 @@ class Decoder(nn.Module):
         input = input.unsqueeze(0)
         embedded = self.embedding(input)
         if self.training:
-            embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob)
+            if torch.__version__ >= "1.8.1":
+                embedded = nn.functional.dropout(embedded, p=self.prob)
+            else:
+                embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob)
 
         emb_con = torch.cat((embedded, context), dim=2)
         output, hidden = self.rnn(emb_con, hidden)
diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py b/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py
index f9e4f40b8ac40f0ab88059b9a8ee7e9513e0b5cf..b25b078dd0c674af196b311d18df76a258438f0e 100644
--- a/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py
+++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/encoder.py
@@ -1,4 +1,6 @@
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 
 
@@ -18,7 +20,10 @@ class Encoder(nn.Module):
         embedded = self.embedding(src)
 
         if self.training:
-            embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob)
+            if torch.__version__ >= "1.8.1":
+                embedded = nn.functional.dropout(embedded, p=self.prob)
+            else:
+                embedded, _, _ = torch.npu_dropoutV2(embedded, self.seed, p=self.prob)
 
         outputs, hidden = self.rnn(embedded)  # no cell state!
 
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py
index 1d3614e68c5df535bab9e3ff3b685bfcea017ddf..9db696766ae52fbc533af33725ba6fa2c50b2240 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/models/model_ctc.py
@@ -17,6 +17,8 @@
 
 import math
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import editdistance as ed
 from collections import OrderedDict
@@ -56,7 +58,10 @@ class BatchRNN(nn.Module):
         x = torch.cat((x_post, x_reverse), 2)
 
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.prob)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
         return x
 
 
@@ -93,7 +98,10 @@ class LayerCNN(nn.Module):
         if self.pooling is not None:
             x = self.pooling(x)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.prob)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
 
         return x
 
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py
index 9a8c28cd7646f63402669d0202f365a315405e69..40845c3a43c3feaa75516e2993bf132e8c942b1a 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/test_ctc.py
@@ -19,6 +19,8 @@ import os
 import time
 import sys
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import yaml
 import argparse
 import torch.nn as nn
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py
index f3471226dc3a0f5a4e12275c7936e982cde9bacb..1aab27976e58f2b645e711e290d8125f76760364 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/1p/steps/train_ctc.py
@@ -26,6 +26,8 @@ import argparse
 import numpy as np
 import random
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import torch.backends.cudnn as cudnn
 import apex
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py
index 0641c341836e9558f3069033aa8db1441472fbc9..2f650e997cc09f9e4753a87a499926dfa8369f3c 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/models/model_ctc.py
@@ -17,6 +17,8 @@
 
 import math
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import editdistance as ed
 from collections import OrderedDict
@@ -56,7 +58,10 @@ class BatchRNN(nn.Module):
         x = torch.cat((x_post, x_reverse), 2)
 
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.prob)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
         
         return x
 
@@ -94,7 +99,10 @@ class LayerCNN(nn.Module):
         if self.pooling is not None:
             x = self.pooling(x)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.prob)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.prob)
 
         return x
 
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py
index 070d7055a23f7c9a9984a3a6c192197200f1a8dc..9d483253aece9d37704a3c5ba6dc5d715041dbf5 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/test_ctc.py
@@ -19,6 +19,8 @@ import os
 import time
 import sys
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import yaml
 import argparse
 import torch.nn as nn
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py
index 6662ed15e236e92a5664a346630fde7e3e66a48b..4c0d6ace0a14ff6eba3cf36bb6986a26dd89271e 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/NPU/8p/steps/train_ctc.py
@@ -25,6 +25,8 @@ import yaml
 import argparse
 import random
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn as nn
 import torch.backends.cudnn as cudnn
 from torch.utils.tensorboard import SummaryWriter
diff --git a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt
index cdbbb913ef3b46275ecbd0bc8780c2fede9019a3..5aa5ab416ed3dac3326a488258cb89a165ba73d0 100644
--- a/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt
+++ b/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch/requirements.txt
@@ -1,4 +1,4 @@
 torchvision
 kaldiio==2.17.2
 editdistance==0.5.3
-tensorboard==1.14.0
\ No newline at end of file
+tensorboard==1.15.0
\ No newline at end of file
diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py
index 399569939354354306b37b693f7319aa96cbb8c5..0872a5f439fdf575cc6c4eb4070e41665bb0c7c6 100644
--- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py
+++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/models/transformer.py
@@ -24,6 +24,8 @@
 import math
 import torch
 import torch.nn as nn
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 import torch.nn.functional as F
 from torch import Tensor
 from typing import Optional, Dict
@@ -185,7 +187,10 @@ class TransformerEncoder(nn.Module):
         if self.embed_positions is not None:
             x += self.embed_positions(src_tokens)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
 
         # B:batch size ; T: seq length ; C: embedding dim 512
         # B x T x C -> T x B x C
@@ -267,7 +272,10 @@ class TransformerDecoder(IncrementalDecoder):
         if positions is not None:
             x += positions
         if self.training:
-            x,_,_ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.dropout)
+            else:
+                x,_,_ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
 
         # B x T x C -> T x B x C
         x = x.transpose(0, 1)
@@ -320,17 +328,26 @@ class TransformerEncoderLayer(nn.Module):
                               need_weights=False,
                               static_kv=False)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
         x = residual + x
         x = self.ln1(x)
 
         residual = x
         x = F.threshold(self.fc1(x), 0.0, 0.0)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.relu_dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout)
         x = self.fc2(x)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p =self.dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
         x = residual + x
         x = self.ln2(x)
         return x
@@ -384,7 +401,10 @@ class TransformerDecoderLayer(nn.Module):
         )
 
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
         x = residual + x
         x = self.self_attn_layer_norm(x)
 
@@ -403,7 +423,10 @@ class TransformerDecoderLayer(nn.Module):
                 need_weights=(not self.training and self.need_attn),
             )
             if self.training:
-                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
+                if torch.__version__ >= "1.8.1":
+                    x = nn.functional.dropout(x, p=self.dropout)
+                else:
+                    x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
             x = residual + x
 
             x = self.encoder_attn_layer_norm(x)
@@ -411,10 +434,16 @@ class TransformerDecoderLayer(nn.Module):
         residual = x
         x = F.threshold(self.fc1(x), 0.0, 0.0)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.relu_dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.relu_dropout)
         x = self.fc2(x)
         if self.training:
-            x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
+            if torch.__version__ >= "1.8.1":
+                x = nn.functional.dropout(x, p=self.dropout)
+            else:
+                x, _, _ = torch.npu_dropoutV2(x, self.seed, p=self.dropout)
         x = residual + x
         x = self.layer_norm(x)
         return x, attn
diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py
index ba5e353d0e44ca13cc2374477533f2ea1ae94a23..d640b25a44880d6ef38227081ea8b2d18b1c4d58 100644
--- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py
+++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py
@@ -24,6 +24,8 @@
 
 from typing import Dict, Optional
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 from torch import nn, Tensor
 from torch.nn import Parameter
 import torch.nn.functional as F
@@ -294,7 +296,10 @@ class MultiheadAttention(nn.Module):
 
         attn_weights = F.softmax(attn_weights, dim=-1)
         if self.training:
-            attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout)
+            if torch.__version__ >= "1.8.1":
+                attn_weights = nn.functional.dropout(attn_weights, p=self.dropout)
+            else:
+                attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout)
 
         attn = strided_bmm2(attn_weights, v)
         assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py
index 30b8685b000288dce957e1d1fb24212c6f04f37b..0748b7bdb35a2027ff2ab099376a12c2fc570ebc 100644
--- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py
+++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/cpex/amp/_process_optimizer.py
@@ -19,6 +19,8 @@ from apex.fp16_utils import master_params_to_model_params
 from apex.multi_tensor_apply import multi_tensor_applier
 from ._amp_state import maybe_print
 import torch
+if torch.__version__ >= "1.8.1":
+    import torch_npu
 from apex.optimizers import FusedSGD, NpuFusedAdam, NpuFusedSGD, NpuFusedAdadelta
 from change_data_ptr import change_data_ptr
 from apex.contrib.combine_tensors import combine_npu
@@ -225,8 +227,8 @@ def combined_init_with_master_weights(stash):
         stash.combined_tensor_fp16, stash.fp16_param_grad_list = get_grad_combined_tensor_from_param(stash.all_fp16_params)
         for model_grad, master in zip(stash.fp16_param_grad_list, stash.all_fp32_from_fp16_params):
             master.grad = torch.empty_like(model_grad.to(torch.float))
-            master.data = master.data.npu_format_cast(model_grad.storage().npu_format())
-
+            master.data = master.data.npu_format_cast(torch.get_npu_format(model_grad))
+            
         stash.combined_tensor_fp32_from_fp16, stash.fp32_from_fp16_param_grad_list = get_grad_combined_tensor_from_param(stash.all_fp32_from_fp16_params)
         stash.combined_tensor_fp32, stash.fp32_param_grad_list = get_grad_combined_tensor_from_param(stash.all_fp32_from_fp32_params)
         # please do not change the order of tensor in this list.