diff --git a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py index 7b5057c7d3e1b9de64986c32de7fba53f069175e..edb433043c144bf09607015e320b6f8547741d74 100644 --- a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py +++ b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/main.py @@ -236,14 +236,14 @@ def main_worker(gpu, ngpus_per_node, args): model = vgg16() model = model.to(loc) - optimizer = torch.optim.SGD(model.parameters(), args.lr, + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss().to(loc) if args.amp: - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value) + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value,combine_grad=True) #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) # optionally resume from a checkpoint diff --git a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py index b28b64969f98d5635ce816c237baa2d53fb45d7c..2f87dff860b0911dcfb5e4ab5d034ad936e7c0b0 100644 --- a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py +++ b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/vgg.py @@ -55,13 +55,11 @@ class VGG(nn.Module): x = self.fc1(x) x = self.relu(x) if self.training: - x = x.cpu() - x = self.drop(x).npu() + x = self.drop(x) x = self.fc2(x) x = self.relu(x) if self.training: - x = x.cpu() - x = self.drop(x).npu() + x = self.drop(x) x = self.fc3(x) return x diff --git a/PyTorch/dev/cv/image_classification/ResNeXt50_ID0419_for_PyTorch/train.py b/PyTorch/dev/cv/image_classification/ResNeXt50_ID0419_for_PyTorch/train.py index 738b696b2a89c91255b6d3a09f3c854f8e8d91f9..0ed86f190608889e0eef841f19210123786e4a3a 100644 --- a/PyTorch/dev/cv/image_classification/ResNeXt50_ID0419_for_PyTorch/train.py +++ b/PyTorch/dev/cv/image_classification/ResNeXt50_ID0419_for_PyTorch/train.py @@ -60,6 +60,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, pri header = 'Epoch: [{}]'.format(epoch) cnt = 0 for image, target in metric_logger.log_every(data_loader, print_freq, header): + # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): start_time = time.time() image, target = image.to(device), target.to(torch.int).to(device) output = model(image) @@ -82,6 +83,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, pri cnt = cnt + 1 if args.max_steps and cnt > args.max_steps: + # if cnt > 10: break @@ -197,7 +199,7 @@ def main(args): data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers, pin_memory=True) + sampler=train_sampler, num_workers=args.workers, pin_memory=False) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=args.batch_size, diff --git a/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/image_classification/training.py b/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/image_classification/training.py index 86db6ff76fe5454f794e41c545e251374908a458..ab4fe76d668fd5b373f88b811f041de5107f97b4 100644 --- a/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/image_classification/training.py +++ b/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/image_classification/training.py @@ -274,6 +274,7 @@ def get_train_step( model_and_loss, optimizer, fp16, use_amp=False, batch_size_multiplier=1 ): def _step(input, target, optimizer_step=True): + # with torch.npu.profile(profiler_result_path="./results", use_e2e_profiler=True): input_var = Variable(input) target_var = Variable(target) loss, output = model_and_loss(input_var, target_var) diff --git a/PyTorch/dev/cv/image_classification/VGG16_ID0467_for_PyTorch/train.py b/PyTorch/dev/cv/image_classification/VGG16_ID0467_for_PyTorch/train.py index 4e9f258d821b649c2d8862a155e08857fad000fa..2776b368b22652a4c5436dd5696e12a084ff173c 100644 --- a/PyTorch/dev/cv/image_classification/VGG16_ID0467_for_PyTorch/train.py +++ b/PyTorch/dev/cv/image_classification/VGG16_ID0467_for_PyTorch/train.py @@ -60,9 +60,10 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, pri header = 'Epoch: [{}]'.format(epoch) cnt = 0 for image, target in metric_logger.log_every(data_loader, print_freq, header): + # with torch.autograd.profiler.profile(use_npu=True) as prof: start_time = time.time() #image, target = image.to(device), target.to(device) - image, target = image.to(device), target.to(torch.int).to(device) + image, target = image.to(device,non_blocking=True), target.to(torch.int).to(device,non_blocking=True) output = model(image) loss = criterion(output, target) @@ -73,6 +74,13 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, pri else: loss.backward() optimizer.step() + # cnt = str(cnt) + # prof.export_chrome_trace("./profiler_"+cnt+"_npu.json") + # cnt = int(cnt) + # print('successfully_training') + # if cnt==10: + # sys.exit() + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = image.shape[0] @@ -220,6 +228,7 @@ def main(args): if args.apex: model, optimizer = amp.initialize(model, optimizer, opt_level=args.apex_opt_level, + loss_scale=128, combine_grad=True )