From 8c3be1de99422fd90b73a316d2c0d9c9eb9199d9 Mon Sep 17 00:00:00 2001
From: x-ting <chenruimin1@huawei.com>
Date: Fri, 25 Nov 2022 18:01:38 +0800
Subject: [PATCH] =?UTF-8?q?RetinaNet=E9=80=82=E9=85=8DPT-1.8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../contrib/cv/detection/RetinaNet/README.md  | 161 ++++++++++++++++++
 .../cv/detection/RetinaNet/README_NPU.md      |  40 -----
 .../RetinaNet/detectron2/engine/defaults.py   |   1 +
 .../RetinaNet/detectron2/utils/events.py      |  10 +-
 .../cv/detection/RetinaNet/requirements.txt   |   2 +-
 .../cv/detection/RetinaNet/test/env_npu.sh    |  47 +++--
 .../detection/RetinaNet/test/train_eval_8p.sh |   2 +-
 .../detection/RetinaNet/test/train_full_1p.sh |   3 +-
 .../detection/RetinaNet/test/train_full_8p.sh |   6 +-
 .../RetinaNet/test/train_performance_1p.sh    |  10 +-
 .../RetinaNet/test/train_performance_8p.sh    |  13 +-
 .../cv/detection/RetinaNet/tools/train_net.py |   8 +
 12 files changed, 216 insertions(+), 87 deletions(-)
 create mode 100644 PyTorch/contrib/cv/detection/RetinaNet/README.md
 delete mode 100644 PyTorch/contrib/cv/detection/RetinaNet/README_NPU.md

diff --git a/PyTorch/contrib/cv/detection/RetinaNet/README.md b/PyTorch/contrib/cv/detection/RetinaNet/README.md
new file mode 100644
index 0000000000..379eaf8e2f
--- /dev/null
+++ b/PyTorch/contrib/cv/detection/RetinaNet/README.md
@@ -0,0 +1,161 @@
+# RetinaNet
+
+-   [概述](概述.md)
+-   [准备训练环境](准备训练环境.md)
+-   [开始训练](开始训练.md)
+-   [训练结果展示](训练结果展示.md)
+-   [版本说明](版本说明.md)
+
+
+
+# 概述
+
+## 简述
+
+针对one stage网络中类别不均衡问题，提出一种新的损失函数：Focal Loss，这个损失函数是在标准交叉熵损失基础上修改得到的。这个函数可以通过减少易分类样本的权重，使得模型在训练时更专注于稀疏的难分类的样本；防止大量易分类负样本在训练中压垮检测器。为了证明focal loss的有效性，作者设计了一个dense detector：RetinaNet，并且在训练时采用focal loss训练。实验证明RetinaNet不仅可以达到one-stage detector的速度，也能超过现有two-stage detector的准确率。
+
+
+- 参考实现：
+
+  ```
+  url=https://github.com/facebookresearch/detectron2
+  commit_id=96c752ce821a3340e27edd51c28a00665dd32a30
+  ```
+
+- 适配昇腾 AI 处理器的实现：
+
+  ```
+  url=https://gitee.com/ascend/ModelZoo-PyTorch.git
+  code_path=PyTorch/built-in/cv/detection
+  ```
+
+- 通过Git获取代码方法如下：
+
+  ```
+  git clone {url}       # 克隆仓库的代码
+  cd {code_path}        # 切换到模型代码所在路径，若仓库下只有该模型，则无需切换
+  ```
+
+- 通过单击“立即下载”，下载源码包。
+
+
+
+# 准备训练环境
+
+## 准备环境
+
+- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。
+
+  **表 1**  版本配套表
+
+  | 配套       | 版本                                                         |
+  | ---------- | ------------------------------------------------------------ |
+  | 硬件       | [1.0.16](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
+  | 固件与驱动  | [5.1.RC2](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
+  | CANN       | [5.1.RC2](https://www.hiascend.com/software/cann/commercial?version=5.1.RC2) |
+  | PyTorch    | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/)       |
+
+- 环境准备指导。
+
+  请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。
+
+- 安装依赖。
+
+  ```
+  pip3.7 install -r requirements.txt
+  ```
+  pillow建议安装较新版本，与之对应的torchvision版本如果无法直接安装，可使用源码安装对应的版本，源码参考链接：https://github.com/pytorch/vision 
+  建议：Pillow版本是9.1.0 torchvision版本是0.6.0。
+
+
+## 准备数据集
+
+
+   用户自行获取coco数据集，包含images图片和annotations文件。其中images图片和annotations文件从[coco官网](https://cocodataset.org/#download)获取，另外还需要labels图片，用户可以从[google drive](https://drive.google.com/uc?export=download&id=1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L)中获取。将获取后的数据集解压放置服务器的任意目录下(建议放到源码包根目录XXX/coco/下)。
+
+  数据集目录结构如下所示：
+
+```
+    coco
+       |-- annotations
+       |-- images
+          |-- train2017
+          |-- val2017   
+       |-- labels
+          |-- train2017
+          |-- val2017
+```	  
+
+# 开始训练
+
+## 训练模型
+
+1. 进入解压后的源码包根目录。
+
+   ```
+   cd /${模型文件夹名称}
+   ```
+
+2. 运行训练脚本。
+
+   该模型支持单机单卡训练和单机8卡训练。
+
+   - 单机单卡训练
+
+     启动单卡训练。
+
+     ```
+     bash ./test/train_full_1p.sh --data_path=real_data_path  # 1p精度    
+     bash ./test/train_performance_1p.sh --data_path=real_data_path  # 1p性能
+     ```
+
+   - 单机8卡训练
+
+     启动8卡训练。
+
+     ```
+     bash ./test/train_full_8p.sh --data_path=real_data_path  # 8p精度    
+     bash ./test/train_performance_8p.sh --data_path=real_data_path  # 8p性能
+
+     ```
+   --data_path参数填写数据集路径。
+
+   模型训练脚本参数说明如下。
+
+   ```
+   公共参数：
+   --data_path                         //数据集路径
+   --config-file                       //训练默认配置文件
+   --device_ids                        //训练指定训练用卡
+   --batch-size                        //训练批次大小，默认：8
+   AMP                                 //是否开启混合精度        
+   OPT_LEVEL                           //混合精度类型，默认：O1
+   LOSS_SCALE_VALUE                    //混合精度lossscale大小，默认：64
+   ```
+   
+   训练完成后，权重文件保存在当前路径下，并输出模型训练精度和性能信息。
+
+
+# 训练结果展示
+
+**表 2**  训练结果展示表
+
+| 名称   | 精度 | 性能    | torch版本 |
+| ------ | ---- | ------- | ------- |
+| NPU-1p | -    | 5.58   | 1.5 |
+| NPU-8p | 0.372    | 34.875 | 1.5 |
+| NPU-1p | - | 9.586  | 1.8 |
+| NPU-8p | 0.375 | 64.0 | 1.8 |
+
+
+# 版本说明
+
+## 变更
+
+2022.11.25：更新pytorch1.8版本，重新发布。
+
+2022.03.18：首次发布。
+
+## 已知问题
+
+无。
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/README_NPU.md b/PyTorch/contrib/cv/detection/RetinaNet/README_NPU.md
deleted file mode 100644
index 04255b92bd..0000000000
--- a/PyTorch/contrib/cv/detection/RetinaNet/README_NPU.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# RetinaNet(Detectron2)
-
-## RetinaNet Detail 
-
-As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. 
-Therefore, RetinaNet is re-implemented using semantics such as custom OP. For details, see detectron2/modeling/meta_arch/retinanet.py
-
-
-## Requirements 
-
-- Install PyTorch ([pytorch.org](http://pytorch.org))
-- Install detectron2
-    - Download RetinaNet from https://gitee.com/ascend/modelzoo.git
-    - Then, cd contrib/PyTorch/Official/cv/image_object_detection/RetinaNet
-    - Then, pip3.7 install -e .
-- Download the ImageNet dataset from http://cocodataset.org/#home
-    - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh)
-- When do the demo, need to download a picture locally and name it input1.jpg
-## Training 
-
-Before to train, preparing R-50.pkl and config weight in the config yaml file.
-To train a model, run `tools/train_net.py` with the desired model architecture and the path to the ImageNet dataset:
-
-```bash
-# 1p train 1p
-bash ./test/train_full_1p.sh  --data_path=数据集路径
-
-#  8p train 8p
-bash ./test/train_full_8p.sh  --data_path=数据集路径
-
-# 8p eval
-bash ./test/train_eval_8p.sh  --data_path=数据集路径
-
-# To ONNX
-python3.7.5 pthtar2onnx.py
-```
-
-
-
-
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/detectron2/engine/defaults.py b/PyTorch/contrib/cv/detection/RetinaNet/detectron2/engine/defaults.py
index 0b33ab2e52..0307b4f69a 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/detectron2/engine/defaults.py
+++ b/PyTorch/contrib/cv/detection/RetinaNet/detectron2/engine/defaults.py
@@ -93,6 +93,7 @@ Run on multiple machines:
     )
     parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
     parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
+    parser.add_argument("--batch-size", type=int, default=64, help="batch_size of all gpus")
     parser.add_argument('--device-ids',nargs='+')
     parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
     parser.add_argument(
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/detectron2/utils/events.py b/PyTorch/contrib/cv/detection/RetinaNet/detectron2/utils/events.py
index a700d42bdc..5c5f008cdd 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/detectron2/utils/events.py
+++ b/PyTorch/contrib/cv/detection/RetinaNet/detectron2/utils/events.py
@@ -26,7 +26,7 @@ from fvcore.common.history_buffer import HistoryBuffer
 from detectron2.config import get_cfg
 
 _CURRENT_STORAGE_STACK = []
-cfg = get_cfg()
+
 
 def get_event_storage():
     """
@@ -184,11 +184,13 @@ class CommonMetricPrinter(EventWriter):
         self.logger = logging.getLogger(__name__)
         self._max_iter = max_iter
         self._last_write = None
+        self.cfg = None 
 
     def write(self):
         storage = get_event_storage()
         iteration = storage.iter
-
+        if self.cfg is None:
+            self.cfg = get_cfg()
         try:
             data_time = storage.history("data_time").avg(20)
         except KeyError:
@@ -239,8 +241,8 @@ class CommonMetricPrinter(EventWriter):
                 data_time="data_time: {:.4f}  ".format(data_time) if data_time is not None else "",
                 lr=lr,
                 memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "",
-                batchsize = "batchsize: {:.0f}  " .format(cfg.SOLVER.IMS_PER_BATCH),
-                fps = "fps: {:.3f}   ".format((cfg.SOLVER.IMS_PER_BATCH *8) / iter_time) if iter_time is not None else "",
+                batchsize = "batchsize: {:.0f}  " .format(int(os.environ['batch_size'])),
+                fps = "fps: {:.3f}   ".format((int(os.environ['batch_size'])) / iter_time) if iter_time is not None else "",
             )
         )
 
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/requirements.txt b/PyTorch/contrib/cv/detection/RetinaNet/requirements.txt
index 2004eb6ec5..a494133671 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/requirements.txt
+++ b/PyTorch/contrib/cv/detection/RetinaNet/requirements.txt
@@ -1,4 +1,4 @@
-torchvision==0.2.2.post2
+torchvision
 fvcore
 pycocotools
 cloudpickle
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/test/env_npu.sh b/PyTorch/contrib/cv/detection/RetinaNet/test/env_npu.sh
index f96f48f812..c80981db88 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/test/env_npu.sh
+++ b/PyTorch/contrib/cv/detection/RetinaNet/test/env_npu.sh
@@ -1,36 +1,23 @@
 #!/bin/bash
-export install_path=/usr/local/Ascend
+CANN_INSTALL_PATH_CONF='/etc/Ascend/ascend_cann_install.info'
 
-if [ -d ${install_path}/toolkit ]; then
-    export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
-    export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
-    export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
-    export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
-    export ASCEND_OPP_PATH=${install_path}/opp
+if [ -f $CANN_INSTALL_PATH_CONF ]; then
+    CANN_INSTALL_PATH=$(cat $CANN_INSTALL_PATH_CONF | grep Install_Path | cut -d "=" -f 2)
 else
-    if [ -d ${install_path}/nnae/latest ];then
-        export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
-        export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
-        export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
-        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
-        export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
-        export ASCEND_AICPU_PATH=${install_path}/nnae/latest
-    else
-        export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
-        export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
-        export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
-        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
-        export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
-        export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
-    fi
+    CANN_INSTALL_PATH="/usr/local/Ascend"
 fi
 
+if [ -d ${CANN_INSTALL_PATH}/ascend-toolkit/latest ]; then
+    source ${CANN_INSTALL_PATH}/ascend-toolkit/set_env.sh
+else
+    source ${CANN_INSTALL_PATH}/nnae/set_env.sh
+fi
 
 #将Host日志输出到串口,0-关闭/1-开启
 export ASCEND_SLOG_PRINT_TO_STDOUT=0
 #设置默认日志级别,0-debug/1-info/2-warning/3-error
 export ASCEND_GLOBAL_LOG_LEVEL=3
-#设置Event日志开启标志,0-关闭/1-开启
+#设置Host侧Event日志开启标志,0-关闭/1-开启
 export ASCEND_GLOBAL_EVENT_ENABLE=0
 #设置是否开启taskque,0-关闭/1-开启
 export TASK_QUEUE_ENABLE=1
@@ -42,8 +29,18 @@ export COMBINED_ENABLE=1
 export DYNAMIC_OP="ADD#MUL"
 #HCCL白名单开关,1-关闭/0-开启
 export HCCL_WHITELIST_DISABLE=1
-export HCCL_IF_IP=$(hostname -I |awk '{print $1}')
 
+#设置device侧日志登记为error
+msnpureport -g error -d 0
+msnpureport -g error -d 1
+msnpureport -g error -d 2
+msnpureport -g error -d 3
+msnpureport -g error -d 4
+msnpureport -g error -d 5
+msnpureport -g error -d 6
+msnpureport -g error -d 7
+#关闭Device侧Event日志
+msnpureport -e disable
 
 path_lib=$(python3.7 -c """
 import sys
@@ -64,4 +61,4 @@ print(result)"""
 
 echo ${path_lib}
 
-export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/test/train_eval_8p.sh b/PyTorch/contrib/cv/detection/RetinaNet/test/train_eval_8p.sh
index b1b4e7599f..8a688fd2bf 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/test/train_eval_8p.sh
+++ b/PyTorch/contrib/cv/detection/RetinaNet/test/train_eval_8p.sh
@@ -70,7 +70,7 @@ python3.7 tools/train_net.py \
         --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml \
         --eval-only  \
 	    AMP 1\
-        OPT_LEVEL O2 \
+        OPT_LEVEL O1 \
         LOSS_SCALE_VALUE 64 \
         MODEL.DEVICE npu:0 \
         SOLVER.IMS_PER_BATCH 16 \
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_1p.sh
index 006be2a84f..b110b86276 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_1p.sh
+++ b/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_1p.sh
@@ -82,8 +82,9 @@ if [ x"${etp_flag}" != x"true" ];then
 fi
 python3.7 tools/train_net.py \
     --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml \
+    --batch-size ${batch_size} \
     AMP 1 \
-    OPT_LEVEL O2 \
+    OPT_LEVEL O1 \
     MODEL.DEVICE npu:${ASCEND_DEVICE_ID} \
     LOSS_SCALE_VALUE 64 \
     SOLVER.IMS_PER_BATCH ${batch_size} \
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_8p.sh
index 4ff3836cde..9285109f55 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_8p.sh
+++ b/PyTorch/contrib/cv/detection/RetinaNet/test/train_full_8p.sh
@@ -70,12 +70,14 @@ python3.7 -u tools/train_net.py \
     --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml \
     --device-ids 0 1 2 3 4 5 6 7 \
     --num-gpus 8 \
+    --batch-size ${batch_size} \
     AMP 1\
-    OPT_LEVEL O2 \
+    OPT_LEVEL O1 \
     LOSS_SCALE_VALUE 64 \
     SOLVER.IMS_PER_BATCH ${batch_size} \
     DATALOADER.NUM_WORKERS ${workers} \
-    SOLVER.BASE_LR 0.04 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+    SOLVER.BASE_LR 0.04 \
+    SOLVER.MAX_ITER ${max_iter} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 
 wait
 
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_1p.sh
index 774a2afef4..16c2d44f57 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_1p.sh
+++ b/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_1p.sh
@@ -5,7 +5,7 @@
 # 网络名称，同目录名称
 Network="RetinaNet"
 # 训练batch_size
-batch_size=16
+batch_size=8
 # 训练使用的npu卡数
 export RANK_SIZE=1
 # 数据集路径,保持为空,不需要修改
@@ -82,13 +82,15 @@ if [ x"${etp_flag}" != x"true" ];then
 fi
 python3.7 tools/train_net.py \
     --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml \
+    --batch-size ${batch_size} \
     AMP 1 \
-    OPT_LEVEL O2 \
+    OPT_LEVEL O1 \
     MODEL.DEVICE npu:${ASCEND_DEVICE_ID} \
     LOSS_SCALE_VALUE 64 \
     SOLVER.IMS_PER_BATCH ${batch_size} \
     DATALOADER.NUM_WORKERS ${workers} \
     SOLVER.BASE_LR 0.01 \
+    DATASETS.TEST '()' \
     SOLVER.MAX_ITER ${max_iter} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 
 wait
@@ -107,11 +109,7 @@ FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log |
 # 打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -A 3 "Evaluation results for bbox:" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | tail -n 1 | awk '{print $2}'`
-
 # 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
 
 # 性能看护结果汇总
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_8p.sh
index f588426261..df967b0b65 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/detection/RetinaNet/test/train_performance_8p.sh
@@ -12,7 +12,7 @@ export RANK_SIZE=8
 data_path=""
 
 # 训练最大iter数
-max_iter=5000
+max_iter=1000
 # 加载数据进程数
 workers=4
 
@@ -70,12 +70,15 @@ python3.7 -u tools/train_net.py \
     --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml \
     --device-ids 0 1 2 3 4 5 6 7 \
     --num-gpus 8 \
+    --batch-size ${batch_size} \
     AMP 1\
-    OPT_LEVEL O2 \
+    OPT_LEVEL O1 \
     LOSS_SCALE_VALUE 64 \
     SOLVER.IMS_PER_BATCH ${batch_size} \
     DATALOADER.NUM_WORKERS ${workers} \
-    SOLVER.BASE_LR 0.04 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+    SOLVER.BASE_LR 0.04 \
+    DATASETS.TEST '()' \
+    SOLVER.MAX_ITER ${max_iter} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 
 wait
 
@@ -93,11 +96,7 @@ FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log |
 # 打印，不需要修改
 echo "Final Performance images/sec : $FPS"
 
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -A 3 "Evaluation results for bbox:" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | tail -n 1 | awk '{print $2}'`
-
 # 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
 echo "E2E Training Duration sec : $e2e_time"
 
 # 性能看护结果汇总
diff --git a/PyTorch/contrib/cv/detection/RetinaNet/tools/train_net.py b/PyTorch/contrib/cv/detection/RetinaNet/tools/train_net.py
index fe8a0cdf85..080f8bed87 100644
--- a/PyTorch/contrib/cv/detection/RetinaNet/tools/train_net.py
+++ b/PyTorch/contrib/cv/detection/RetinaNet/tools/train_net.py
@@ -33,6 +33,8 @@ import logging
 import os
 from collections import OrderedDict
 import torch
+if torch.__version__ >= '1.8.1':
+    import torch_npu
 from apex import amp
 
 import detectron2.utils.comm as comm
@@ -146,6 +148,7 @@ def setup(args):
 
 
 def main(args):
+    os.environ['batch_size'] = str(args.batch_size)
     cfg = setup(args)
 
     """
@@ -153,6 +156,11 @@ def main(args):
     consider writing your own training loop (see plain_train_net.py) or
     subclassing the trainer.
     """
+    option = {}
+    option["ACL_OP_COMPILER_CACHE_MODE"] = "enable" # cache功能启用 
+    option["ACL_OP_COMPILER_CACHE_DIR"] = "./cache" # cache所在文件夹
+    print("option:",option)
+    torch.npu.set_option(option)
     trainer = Trainer(cfg, args)
     trainer.resume_or_load(resume=args.resume)
     if cfg.TEST.AUG.ENABLED:
-- 
Gitee