From 1a7c3ea4c090d41ccb14b4f8304724b1df5f4492 Mon Sep 17 00:00:00 2001 From: brjiang Date: Thu, 20 Jun 2024 09:55:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9Eyolov8=E7=9A=84=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E6=A0=B7=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/foundation/cv/yolov8/README.md | 110 ++++++++++ .../built-in/foundation/cv/yolov8/test.py | 204 ++++++++++++++++++ 2 files changed, 314 insertions(+) create mode 100644 MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md create mode 100644 MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py diff --git a/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md new file mode 100644 index 0000000000..b9423e8ec5 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/README.md @@ -0,0 +1,110 @@ +# stable-diffusionxl-controlnet模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署yolov8模型 + +- 参考实现: + ```bash + https://github.com/ultralytics/ultralytics + ``` + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0 | - | + +# 快速上手 +## 获取源码 + +1. 安装依赖。 + + ```bash + pip install ultralytics + ``` + +2. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +## 模型推理 + +0. 获取权重及数据 + 可提前下载权重,放到代码同级目录下,以避免执行后面步骤时可能会出现下载失败。 + + ```bash + wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt + ``` + + 准备用于测试的样本图像,放到当前目录下,可使用[yolov8官方测试样本](https://ultralytics.com/images/bus.jpg) + +1. 模型编译 + 执行下述命令进行模型编译 + ```bash + python test.py --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts + ``` + + 参数说明: + - --img_height: 图像高度,需要是32的倍数,模型会将任意大小的输入图像预处理至(img_height, img_width) + - --img_width: 图像宽度,需要是32的倍数,模型会将任意大小的输入图像预处理至(img_height, img_width) + - --pretrain_path:预训练权重路径 + - --save_path:使用mindietorch编译后的结果的保存路径 + +2. 样本测试 + 执行下述命令可以将任意样本图像的目标检测结果可视化 + ```bash + python test.py --img_path ./bus.jpg --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts --mode sample + ``` + + 参数说明: + - --img_path:样本图像路径 + - --img_height: 图像高度,与模型编译保持一致 + - --img_width: 图像宽度,与模型编译保持一致 + - --pretrain_path:预训练权重路径 + - --save_path:使用mindietorch编译后的结果的保存路径 + - --mode:指定mode为sample则可以进行样本测试 + + 原始模型推理样本图像的结果将保存为pretrained_result.jpg,原始模型导出的torchscript格式模型的推理结果将保存为exported_result.jpg,使用mindietorch编译得到模型的推理结果将保存为compiled_result.jpg + +3. 精度测试 + 执行下述命令可以使用随机数据测试模型精度 + ```bash + python test.py --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts --mode accuracy + ``` + + 参数说明: + - --img_height: 图像高度,与模型编译保持一致 + - --img_width: 图像宽度,与模型编译保持一致 + - --pretrain_path:预训练权重路径 + - --save_path:使用mindietorch编译后的结果的保存路径 + - --mode:指定mode为accuracy则可以进行精度测试 + +4. 性能测试 + 执行下述命令可以使用随机数据测试模型性能 + ```bash + python test.py --img_height 1088 --img_width 1920 --pretrain_path ./yolov8n.pt --save_path ./compiled_model.ts --mode performance + ``` + + 参数说明: + - --img_height: 图像高度,与模型编译保持一致 + - --img_width: 图像宽度,与模型编译保持一致 + - --pretrain_path:预训练权重路径 + - --save_path:使用mindietorch编译后的结果的保存路径 + - --mode:指定mode为performance则可以进行性能测试 \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py new file mode 100644 index 0000000000..cf7b404902 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/foundation/cv/yolov8/test.py @@ -0,0 +1,204 @@ +from tqdm import tqdm +import time +import argparse + +import torch +import numpy as np +from PIL import Image +from ultralytics import YOLO +from ultralytics.data.augment import LetterBox +from ultralytics.engine.results import Results +from ultralytics.utils import ops + +import mindietorch +from torch._export import export, dynamic_dim + + +COSINE_THRESHOLD = 0.999 +def cosine_similarity(gt_tensor, pred_tensor): + gt_tensor = gt_tensor.flatten().to(torch.float32) + pred_tensor = pred_tensor.flatten().to(torch.float32) + if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0: + if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True): + return 1.0 + res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6) + res = res.cpu().detach().item() + return res + + +def image_preprocess(img_path, img_sz): + """Pre-process input image to torch.Tensor with desired image size""" + + img = Image.open(img_path).convert('RGB') + img_array = np.array(img) + + letterbox = LetterBox(img_sz, auto=False, stride=32) + img_resized = letterbox(image=img_array) + + img_resized = img_resized[np.newaxis, :].transpose((0, 3, 1, 2)) + img_resized = np.ascontiguousarray(img_resized) + + img_tensor = torch.from_numpy(img_resized) + img_tensor = img_tensor / 255 + + return img_tensor, img_array[np.newaxis, :, :, ::-1] + + +def result_postprocess(pred, img, orig_imgs): + """Post-processes predictions and returns a list of Results objects.""" + + preds = ops.non_max_suppression([pred, ], 0.25, 0.7, agnostic=False, max_det=300) + + names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', + 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', + 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', + 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', + 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', + 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', + 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', + 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', + 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', + 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', + 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + results.append(Results(orig_img, path="", names=names, boxes=pred)) + return results + + +def accuracy(save_path, default_exported_model_path, img_size, device): + """This function is used to test accuracy with random input.""" + print('Start accuracy test.') + + exported_model = torch.jit.load(default_exported_model_path) + compiled_model = torch.jit.load(save_path) + + compare_res = 0 + shape = (1, 3, img_size[0], img_size[1]) + + for i in tqdm(range(100)): + random_input = torch.rand(shape).to(device) + + mindie_res = compiled_model(random_input) + torch_res = exported_model(random_input.to("cpu")) + + res = cosine_similarity(mindie_res.to("cpu"), torch_res) + if res < COSINE_THRESHOLD: + compare_res += 1 + + if compare_res == 0: + print("Compare success! Compiled model has the same output with origin torch model!") + else: + print("Compare failed! {} samples are not equal with origin torch model!".format(compare_res)) + + +def performance(save_path, img_size, device): + """This function is used to test performance with random input.""" + compiled_model = torch.jit.load(save_path) + + stream = mindietorch.npu.Stream(device) + # warm up + num_warmup = 20 + shape = (1, 3, img_size[0], img_size[1]) + random_input = torch.rand(shape).to(device) + for _ in range(num_warmup): + with mindietorch.npu.stream(stream): + compiled_model(random_input) + stream.synchronize() + print('warmup done') + + # performance test + print('Start performance test.') + total_time = 0 + total_num = 1000 + + random_input = torch.rand(shape).to(device) + for i in range(total_num): + with mindietorch.npu.stream(stream): + infer_start = time.time() + compiled_model(random_input) + stream.synchronize() + infer_end = time.time() + total_time += infer_end - infer_start + + average_time = total_time / total_num + fps = 1 / average_time + print("Average time: ", average_time * 1000, "ms") + print("fps:", fps) + + +def sample(pretrain_path, default_exported_model_path, save_path, img_path, img_size): + """This function is to visualize detection result of sample image.""" + + print("Preprocess input image.") + input_img, origin_img = image_preprocess(img_path, img_size) + + print("Begin to load pretrained model, exported model and compiled model.") + pretrained_model = YOLO(pretrain_path) + exported_model = torch.jit.load(default_exported_model_path) + compiled_model = torch.jit.load(save_path) + + print("Predict with pretrained model.") + results = pretrained_model(img_path) + for result in results: + result.save(filename="./pretrained_result.jpg") + + print("Predict with exported torchscript model.") + exported_result = exported_model(input_img) + exported_result = result_postprocess(exported_result, input_img, origin_img) + for result in exported_result: + result.save(filename="./exported_result.jpg") + + print("Predict with compiled model.") + compiled_result = compiled_model(input_img.to("npu")) + compiled_result = result_postprocess(compiled_result.to("cpu"), input_img, origin_img) + for result in compiled_result: + result.save(filename="./compiled_result.jpg") + + +def export(input_img_size, pretrain_path, save_path, default_exported_model_path): + """This function is used to export the TorchScript model, compile it using MindieTorch, and save it.""" + + print("Load pretrained model.") + model = YOLO(pretrain_path) + + print("Export pretrained model to torchscript, which is by default saved as yolov8n.torchscript.") + model.export(format="torchscript", imgsz=input_img_size) + exported_model = torch.jit.load(default_exported_model_path) + + print("Begin compile model with mindietorch.") + compile_inputs = [mindietorch.Input(min_shape = (1, 3, input_img_size[0], input_img_size[1]), + max_shape = (1, 3, input_img_size[0], input_img_size[1]), + dtype = torch.float32)] + compiled_model = mindietorch.compile(exported_model, ir="ts", inputs=compile_inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16) + compiled_model.save(save_path) + + print("Finish compiling yolov8 with mindietorch!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--img_path", default="./bus.jpg", help="path of sample img") + parser.add_argument("--img_height", default=1088, help="the desired input image height") + parser.add_argument("--img_width", default=1920, help="the desired input image width") + parser.add_argument("--mode", default="export", choices=["export", "accuracy", "performance", "sample"]) + parser.add_argument("--pretrain_path", default="yolov8n.pt", help="pretrain model path") + parser.add_argument("--save_path", default="./compiled_model.ts", help="save path of compiled model") + args = parser.parse_args() + + img_size = (args.img_height, args.img_width) + default_exported_model_path = "./yolov8n.torchscript" + + if args.mode == "export": + export(img_size, args.pretrain_path, args.save_path, default_exported_model_path) + elif args.mode == "accuracy": + accuracy(args.save_path, default_exported_model_path, img_size, "npu:0") + elif args.mode == "performance": + performance(args.save_path, img_size, "npu:0") + elif args.mode == "sample": + sample(args.pretrain_path, default_exported_model_path, args.save_path, args.img_path, img_size) \ No newline at end of file -- Gitee