From 1a7c3ea4c090d41ccb14b4f8304724b1df5f4492 Mon Sep 17 00:00:00 2001
From: brjiang <jiangbangrui@huawei.com>
Date: Thu, 20 Jun 2024 09:55:05 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9Eyolov8=E7=9A=84=E6=B5=8B?=
 =?UTF-8?q?=E8=AF=95=E6=A0=B7=E4=BE=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../built-in/foundation/cv/yolov8/README.md   | 110 ++++++++++
 .../built-in/foundation/cv/yolov8/test.py     | 204 ++++++++++++++++++
 2 files changed, 314 insertions(+)
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py

diff --git a/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md
new file mode 100644
index 0000000000..b9423e8ec5
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md
@@ -0,0 +1,110 @@
+# stable-diffusionxl-controlnet模型-推理指导
+
+- [概述](#概述)
+- [推理环境准备](#推理环境准备)
+- [快速上手](#快速上手)
+  - [获取源码](#获取源码)
+  - [模型推理](#模型推理)
+
+# 概述
+
+该工程使用mindietorch部署yolov8模型
+
+- 参考实现：
+  ```bash
+   https://github.com/ultralytics/ultralytics
+  ```
+
+# 推理环境准备
+
+- 该模型需要以下插件与驱动
+
+  **表 1**  版本配套表
+
+  | 配套   | 版本    | 环境准备指导 |
+  | ------ | ------- | ------------ |
+  | Python | 3.10.13 | -            |
+  | torch  | 2.1.0   | -            |
+
+# 快速上手
+## 获取源码
+
+1. 安装依赖。
+
+   ```bash
+   pip install ultralytics
+   ```
+
+2. 安装mindie包
+
+   ```bash
+   # 安装mindie
+   chmod +x ./Ascend-mindie_xxx.run
+   ./Ascend-mindie_xxx.run --install
+   source /usr/local/Ascend/mindie/set_env.sh
+   ```
+
+## 模型推理
+
+0. 获取权重及数据
+    可提前下载权重，放到代码同级目录下，以避免执行后面步骤时可能会出现下载失败。
+
+    ```bash
+    wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt
+    ```
+    
+    准备用于测试的样本图像，放到当前目录下，可使用[yolov8官方测试样本](https://ultralytics.com/images/bus.jpg)
+
+1. 模型编译
+   执行下述命令进行模型编译
+    ```bash
+    python test.py --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts
+    ```
+
+    参数说明：
+      - --img_height: 图像高度，需要是32的倍数，模型会将任意大小的输入图像预处理至(img_height, img_width)
+      - --img_width: 图像宽度，需要是32的倍数，模型会将任意大小的输入图像预处理至(img_height, img_width)
+      - --pretrain_path：预训练权重路径
+      - --save_path：使用mindietorch编译后的结果的保存路径
+    
+2. 样本测试
+  执行下述命令可以将任意样本图像的目标检测结果可视化
+    ```bash
+    python test.py --img_path ./bus.jpg --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts --mode sample
+    ```
+
+    参数说明：
+      - --img_path：样本图像路径
+      - --img_height: 图像高度，与模型编译保持一致
+      - --img_width: 图像宽度，与模型编译保持一致
+      - --pretrain_path：预训练权重路径
+      - --save_path：使用mindietorch编译后的结果的保存路径
+      - --mode：指定mode为sample则可以进行样本测试
+
+  原始模型推理样本图像的结果将保存为pretrained_result.jpg，原始模型导出的torchscript格式模型的推理结果将保存为exported_result.jpg，使用mindietorch编译得到模型的推理结果将保存为compiled_result.jpg
+
+3. 精度测试
+  执行下述命令可以使用随机数据测试模型精度
+    ```bash
+    python test.py --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts --mode accuracy
+    ```
+
+    参数说明：
+      - --img_height: 图像高度，与模型编译保持一致
+      - --img_width: 图像宽度，与模型编译保持一致
+      - --pretrain_path：预训练权重路径
+      - --save_path：使用mindietorch编译后的结果的保存路径
+      - --mode：指定mode为accuracy则可以进行精度测试
+
+4. 性能测试
+  执行下述命令可以使用随机数据测试模型性能
+    ```bash
+    python test.py --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts --mode performance
+    ```
+
+    参数说明：
+      - --img_height: 图像高度，与模型编译保持一致
+      - --img_width: 图像宽度，与模型编译保持一致
+      - --pretrain_path：预训练权重路径
+      - --save_path：使用mindietorch编译后的结果的保存路径
+      - --mode：指定mode为performance则可以进行性能测试
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py
new file mode 100644
index 0000000000..cf7b404902
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py
@@ -0,0 +1,204 @@
+from tqdm import tqdm
+import time
+import argparse
+
+import torch
+import numpy as np
+from PIL import Image
+from ultralytics import YOLO
+from ultralytics.data.augment import LetterBox
+from ultralytics.engine.results import Results
+from ultralytics.utils import ops
+
+import mindietorch
+from torch._export import export, dynamic_dim
+
+
+COSINE_THRESHOLD = 0.999
+def cosine_similarity(gt_tensor, pred_tensor):
+    gt_tensor = gt_tensor.flatten().to(torch.float32)
+    pred_tensor = pred_tensor.flatten().to(torch.float32)
+    if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0:
+        if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True):
+            return 1.0
+    res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6)
+    res = res.cpu().detach().item()
+    return res
+
+
+def image_preprocess(img_path, img_sz):
+    """Pre-process input image to torch.Tensor with desired image size"""
+
+    img = Image.open(img_path).convert('RGB')
+    img_array = np.array(img)
+
+    letterbox = LetterBox(img_sz, auto=False, stride=32)
+    img_resized =  letterbox(image=img_array)
+
+    img_resized = img_resized[np.newaxis, :].transpose((0, 3, 1, 2))
+    img_resized = np.ascontiguousarray(img_resized)
+
+    img_tensor = torch.from_numpy(img_resized)
+    img_tensor = img_tensor / 255
+
+    return img_tensor, img_array[np.newaxis, :, :, ::-1]
+
+
+def result_postprocess(pred, img, orig_imgs):
+    """Post-processes predictions and returns a list of Results objects."""
+
+    preds = ops.non_max_suppression([pred, ], 0.25, 0.7, agnostic=False, max_det=300)
+
+    names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck',
+             8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
+             14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
+             22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase',
+             29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
+             35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
+             40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana',
+             47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza',
+             54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table',
+             61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
+             68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock',
+             75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
+
+    results = []
+    for i, pred in enumerate(preds):
+        orig_img = orig_imgs[i]
+        pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+        results.append(Results(orig_img, path="", names=names, boxes=pred))
+    return results
+
+
+def accuracy(save_path, default_exported_model_path, img_size, device):
+    """This function is used to test accuracy with random input."""
+    print('Start accuracy test.')
+
+    exported_model = torch.jit.load(default_exported_model_path)
+    compiled_model = torch.jit.load(save_path)
+
+    compare_res = 0
+    shape = (1, 3, img_size[0], img_size[1])
+
+    for i in tqdm(range(100)):
+        random_input = torch.rand(shape).to(device)
+
+        mindie_res = compiled_model(random_input)
+        torch_res = exported_model(random_input.to("cpu"))
+
+        res = cosine_similarity(mindie_res.to("cpu"), torch_res)
+        if res < COSINE_THRESHOLD:
+            compare_res += 1
+
+    if compare_res == 0:
+        print("Compare success! Compiled model has the same output with origin torch model!")
+    else:
+        print("Compare failed! {} samples are not equal with origin torch model!".format(compare_res))
+
+
+def performance(save_path, img_size, device):
+    """This function is used to test performance with random input."""
+    compiled_model = torch.jit.load(save_path)
+
+    stream = mindietorch.npu.Stream(device)
+    # warm up
+    num_warmup = 20
+    shape = (1, 3, img_size[0], img_size[1])
+    random_input = torch.rand(shape).to(device)
+    for _ in range(num_warmup):
+        with mindietorch.npu.stream(stream):
+            compiled_model(random_input)
+            stream.synchronize()
+    print('warmup done')
+
+    # performance test
+    print('Start performance test.')
+    total_time = 0
+    total_num = 1000
+
+    random_input = torch.rand(shape).to(device)
+    for i in range(total_num):
+        with mindietorch.npu.stream(stream):
+            infer_start = time.time()
+            compiled_model(random_input)
+            stream.synchronize()
+            infer_end = time.time()
+            total_time += infer_end - infer_start
+
+    average_time = total_time / total_num
+    fps = 1 / average_time
+    print("Average time: ", average_time * 1000, "ms")
+    print("fps:", fps)
+
+
+def sample(pretrain_path, default_exported_model_path, save_path, img_path, img_size):
+    """This function is to visualize detection result of sample image."""
+
+    print("Preprocess input image.")
+    input_img, origin_img = image_preprocess(img_path, img_size)
+
+    print("Begin to load pretrained model, exported model and compiled model.")
+    pretrained_model = YOLO(pretrain_path)
+    exported_model = torch.jit.load(default_exported_model_path)
+    compiled_model = torch.jit.load(save_path)
+
+    print("Predict with pretrained model.")
+    results = pretrained_model(img_path)
+    for result in results:
+        result.save(filename="./pretrained_result.jpg")
+    
+    print("Predict with exported torchscript model.")
+    exported_result = exported_model(input_img)
+    exported_result = result_postprocess(exported_result, input_img, origin_img)
+    for result in exported_result:
+        result.save(filename="./exported_result.jpg")
+
+    print("Predict with compiled model.")
+    compiled_result = compiled_model(input_img.to("npu"))
+    compiled_result = result_postprocess(compiled_result.to("cpu"), input_img, origin_img)
+    for result in compiled_result:
+        result.save(filename="./compiled_result.jpg")
+
+
+def export(input_img_size, pretrain_path, save_path, default_exported_model_path):
+    """This function is used to export the TorchScript model, compile it using MindieTorch, and save it."""
+
+    print("Load pretrained model.")
+    model = YOLO(pretrain_path)
+    
+    print("Export pretrained model to torchscript, which is by default saved as yolov8n.torchscript.")
+    model.export(format="torchscript", imgsz=input_img_size)
+    exported_model = torch.jit.load(default_exported_model_path)
+
+    print("Begin compile model with mindietorch.")
+    compile_inputs = [mindietorch.Input(min_shape = (1, 3, input_img_size[0], input_img_size[1]),
+                                        max_shape = (1, 3, input_img_size[0], input_img_size[1]),
+                                        dtype = torch.float32)]
+    compiled_model = mindietorch.compile(exported_model, ir="ts", inputs=compile_inputs,
+                                         precision_policy=mindietorch.PrecisionPolicy.FP16)
+    compiled_model.save(save_path)
+
+    print("Finish compiling yolov8 with mindietorch!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--img_path", default="./bus.jpg", help="path of sample img")
+    parser.add_argument("--img_height", default=1088, help="the desired input image height")
+    parser.add_argument("--img_width", default=1920, help="the desired input image width")
+    parser.add_argument("--mode", default="export", choices=["export", "accuracy", "performance", "sample"])
+    parser.add_argument("--pretrain_path", default="yolov8n.pt", help="pretrain model path")
+    parser.add_argument("--save_path", default="./compiled_model.ts", help="save path of compiled model")
+    args = parser.parse_args()
+
+    img_size = (args.img_height, args.img_width)
+    default_exported_model_path = "./yolov8n.torchscript"
+
+    if args.mode == "export":
+        export(img_size, args.pretrain_path, args.save_path, default_exported_model_path)
+    elif args.mode == "accuracy":
+        accuracy(args.save_path, default_exported_model_path, img_size, "npu:0")
+    elif args.mode == "performance":
+        performance(args.save_path, img_size, "npu:0")
+    elif args.mode == "sample":
+        sample(args.pretrain_path, default_exported_model_path, args.save_path, args.img_path, img_size)
\ No newline at end of file
-- 
Gitee