From 4d789a4181849490abbcafc9d54229c78d96fc13 Mon Sep 17 00:00:00 2001 From: commc Date: Wed, 4 Sep 2024 15:25:08 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E5=AE=A2=E6=88=B7=E9=9C=80=E6=B1=82CLIP?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E6=80=A7=E8=83=BD=E5=8F=8A=E7=B2=BE=E5=BA=A6?= =?UTF-8?q?=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/multimodal/perf_test_aie.py | 99 +++++++++++++ .../built-in/multimodal/perf_test_onnx.py | 97 +++++++++++++ .../built-in/multimodal/precision_test.py | 134 ++++++++++++++++++ 3 files changed, 330 insertions(+) create mode 100644 MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py create mode 100644 MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py create mode 100644 MindIE/MindIE-Torch/built-in/multimodal/precision_test.py diff --git a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py new file mode 100644 index 0000000000..f268c4680d --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py @@ -0,0 +1,99 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import logging +import argparse +import time +import torch +import mindietorch + +logging.basicConfig(level=logging.INFO) + + +def test(inputs, model, stream, meta=""): + # warmup + for _ in range(10): + with mindietorch.npu.stream(stream): + model(*inputs) + stream.synchronize() + + # performance test + num_infer = 100 + start = time.time() + for _ in range(num_infer): + with mindietorch.npu.stream(stream): + model(*inputs) + stream.synchronize() + end = time.time() + + logging.info("%s latency: %.2f ms", meta, (end - start) / num_infer * 1000) + logging.info("%s throughput: %.2f fps", meta, num_infer / (end - start)) + + +def test_clip(args): + device = f'npu:{args.device_id}' + stream = mindietorch.npu.Stream(device) + if args.clip_aie_path.endswith(".ts"): + model = torch.jit.load(args.clip_aie_path) + else: + model = torch.load(args.clip_aie_path) + model.eval().to(device) + + hf_config_path = os.path.join(args.hf_model_path, "config.json") + if not os.path.exists(hf_config_path): + raise FileNotFoundError(f"config.json not found at {args.hf_model_path}: {hf_config_path}") + with open(hf_config_path, "r") as f: + config_dict = json.load(f) + + image_width = config_dict["vision_config"]["image_size"] + img_input_shape = (args.image_batchsize, 3, image_width, image_width) + text_input_shape = (args.text_batchsize, args.token_len) + input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) + input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).to(device) + attention_mask = torch.ones(text_input_shape, dtype=torch.int32).to(device) + inputs = [input_ids, input_img, attention_mask] + + test(inputs, model, stream, "CLIP") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--device-id", type=int, help="NPU device id", default=0) + parser.add_argument( + "--clip-aie-path", + type=str, + default="/Path/to/compiled/aie_or_ts_model" + ) + parser.add_argument( + "--hf-model-path", + default="/Path/to/Huggingface_model_path", + type=str, + help="Huggingface CLIP Model Path." + ) + parser.add_argument("--text-batchsize", type=int, default=80) + parser.add_argument("--image-batchsize", type=int, default=1) + parser.add_argument("--token-len", type=int, default=52) + + return parser.parse_args() + + +def main(): + perf_args = parse_args() + mindietorch.set_device(perf_args.device_id) + test_clip(perf_args) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py new file mode 100644 index 0000000000..106b7b87d2 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py @@ -0,0 +1,97 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import logging +import argparse +import time +import torch +import onnxruntime as ort + +logging.basicConfig(level=logging.INFO) + + +def test(encoder_path, provider, output_names, onnx_inputs, meta=""): + onnx_model = ort.InferenceSession( + encoder_path, + providers=[provider] + ) + + # warmup + for _ in range(10): + onnx_model.run(output_names, onnx_inputs) + # performance test + num_infer = 100 + start = time.time() + for _ in range(num_infer): + onnx_model.run(output_names, onnx_inputs) + end = time.time() + + logging.info("%s latency: %.2f ms", meta, (end - start) / num_infer * 1000) + logging.info("%s throughput: %.2f fps", meta, num_infer / (end - start)) + + +def test_clip(args, provider): + hf_config_path = os.path.join(args.hf_model_path, "config.json") + if not os.path.exists(hf_config_path): + raise FileNotFoundError(f"config.json not found at {args.hf_model_path}: {hf_config_path}") + with open(hf_config_path, "r") as f: + config_dict = json.load(f) + + image_width = config_dict["vision_config"]["image_size"] + img_input_shape = (args.image_batchsize, 3, image_width, image_width) + text_input_shape = (args.text_batchsize, args.token_len) + input_img = torch.randn(img_input_shape, dtype=torch.float32).detach().numpy() + input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).detach().numpy() + attention_mask = torch.ones(text_input_shape, dtype=torch.int32).detach().numpy() + + onnx_inputs = {"input_ids": input_ids, "pixel_values": input_img, "attention_mask": attention_mask} + output_names = ["image_embeds", "text_embeds", "logits_per_text", "logits_per_image"] + + test(args.onnx_path, provider, output_names, onnx_inputs, "CLIP") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--onnx-path", + type=str, + default="/Path/to/onnx_model" + ) + parser.add_argument( + "--hf-model-path", + default="/Path/to/Huggingface_model_path", + type=str, + help="Huggingface CLIP Model Path." + ) + parser.add_argument("--text-batchsize", type=int, default=80) + parser.add_argument("--image-batchsize", type=int, default=1) + parser.add_argument("--token-len", type=int, default=52) + parser.add_argument("--use-gpu", action="store_true") + + return parser.parse_args() + + +def main(): + perf_args = parse_args() + if perf_args.use_gpu: + provider = "CUDAExecutionProvider" + else: + provider = "CPUExecutionProvider" + + test_clip(perf_args, provider) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py b/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py new file mode 100644 index 0000000000..8a46d0e965 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py @@ -0,0 +1,134 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import logging +import argparse +import torch +import mindietorch +import torch +import onnxruntime as ort +import numpy as np +import torch.nn.functional as F + +logging.basicConfig(level=logging.INFO) + + +def compare_onnx_aie_output(onnx_out, aie_out, sim_threshold=0.99): + num_sim = 0 + for i, (a, b) in enumerate(zip(onnx_out, aie_out)): + a = a.reshape(1, -1).astype(np.float32) + b = b.reshape(1, -1) + sim = F.cosine_similarity(torch.from_numpy(a), b, dim=1) + if sim > sim_threshold: + num_sim += 1 + else: + logging.info('Output %d similarity: %f', i, sim) + + logging.info('Number of outputs to compare: %d', len(onnx_out)) + logging.info('Number of outputs with cosine similarity > %.2f: %d', sim_threshold, num_sim) + + +def compare(args): + # MindIETorch + device = f'npu:{args.device_id}' + stream = mindietorch.npu.Stream(device) + + if args.clip_aie_path.endswith(".ts"): + aie_model = torch.jit.load(args.clip_aie_path) + else: + aie_model = torch.load(args.clip_aie_path) + aie_model.eval().to(device) + + hf_config_path = os.path.join(args.hf_model_path, "config.json") + if not os.path.exists(hf_config_path): + raise FileNotFoundError(f"config.json not found at {args.hf_model_path}: {hf_config_path}") + with open(hf_config_path, "r") as f: + config_dict = json.load(f) + + image_width = config_dict["vision_config"]["image_size"] + img_input_shape = (args.image_batchsize, 3, image_width, image_width) + text_input_shape = (args.text_batchsize, args.token_len) + input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) + input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).to(device) + attention_mask = torch.ones(text_input_shape, dtype=torch.int32).to(device) + inputs = [input_ids, input_img, attention_mask] + + with mindietorch.npu.stream(stream): + aie_out = aie_model(*inputs) + stream.synchronize() + + if isinstance(aie_out, tuple) or isinstance(aie_out, list): + aie_out = (x.cpu() for x in aie_out) + else: + aie_out = aie_out.cpu() + + # ONNX + input_img = input_img.cpu().detach().numpy() + input_ids = input_ids.cpu().detach().numpy() + attention_mask = attention_mask.cpu().detach().numpy() + + if args.use_gpu: + provider = "CUDAExecutionProvider" + else: + provider = "CPUExecutionProvider" + + onnx_model = ort.InferenceSession( + args.clip_onnx_path, + providers=[provider] + ) + onnx_inputs = {"input_ids": input_ids, "pixel_values": input_img, "attention_mask": attention_mask} + output_names = ["image_embeds", "text_embeds", "logits_per_text", "logits_per_image"] + onnx_out = onnx_model.run(output_names, onnx_inputs) + + compare_onnx_aie_output(onnx_out, aie_out, args.sim_threshold) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--device-id", type=int, default=0, help="NPU device id") + parser.add_argument( + "--clip-aie-path", + type=str, + default="/Path/to/compiled/aie_or_ts_model" + ) + parser.add_argument( + "--clip-onnx-path", + type=str, + default="/Path/to/onnx_model" + ) + parser.add_argument( + "--hf-model-path", + default="/Path/to/Huggingface_model_path", + type=str, + help="Huggingface CLIP Model Path." + ) + parser.add_argument("--text-batchsize", type=int, default=80) + parser.add_argument("--image-batchsize", type=int, default=1) + parser.add_argument("--token-len", type=int, default=52) + parser.add_argument('--sim-threshold', type=float, default=0.99) + parser.add_argument("--use-gpu", action="store_true") + + return parser.parse_args() + + +def main(): + compare_args = parse_args() + mindietorch.set_device(compare_args.device_id) + logging.info('=== Compare the outputs of ONNX and AIE ===') + compare(compare_args) + + +if __name__ == "__main__": + main() \ No newline at end of file -- Gitee From 0729d7bb1273898d78db95d7d0d525560b391cf1 Mon Sep 17 00:00:00 2001 From: commc Date: Wed, 4 Sep 2024 18:36:33 +0800 Subject: [PATCH 2/4] =?UTF-8?q?config=E6=96=87=E4=BB=B6=E6=89=93=E5=BC=80?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MindIE-Torch/built-in/multimodal/perf_test_aie.py | 10 +++------- .../MindIE-Torch/built-in/multimodal/perf_test_onnx.py | 9 +++------ .../MindIE-Torch/built-in/multimodal/precision_test.py | 10 +++------- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py index f268c4680d..ceff8b8648 100644 --- a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py +++ b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py @@ -18,6 +18,7 @@ import argparse import time import torch import mindietorch +from transformers import AutoConfig logging.basicConfig(level=logging.INFO) @@ -50,14 +51,9 @@ def test_clip(args): else: model = torch.load(args.clip_aie_path) model.eval().to(device) + config = AutoConfig.from_pretrained(args.hf_model_path) - hf_config_path = os.path.join(args.hf_model_path, "config.json") - if not os.path.exists(hf_config_path): - raise FileNotFoundError(f"config.json not found at {args.hf_model_path}: {hf_config_path}") - with open(hf_config_path, "r") as f: - config_dict = json.load(f) - - image_width = config_dict["vision_config"]["image_size"] + image_width = config.vision_config.image_size img_input_shape = (args.image_batchsize, 3, image_width, image_width) text_input_shape = (args.text_batchsize, args.token_len) input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) diff --git a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py index 106b7b87d2..3dbc7f9d67 100644 --- a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py +++ b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py @@ -18,6 +18,7 @@ import argparse import time import torch import onnxruntime as ort +from transformers import AutoConfig logging.basicConfig(level=logging.INFO) @@ -43,13 +44,9 @@ def test(encoder_path, provider, output_names, onnx_inputs, meta=""): def test_clip(args, provider): - hf_config_path = os.path.join(args.hf_model_path, "config.json") - if not os.path.exists(hf_config_path): - raise FileNotFoundError(f"config.json not found at {args.hf_model_path}: {hf_config_path}") - with open(hf_config_path, "r") as f: - config_dict = json.load(f) + config = AutoConfig.from_pretrained(args.hf_model_path) - image_width = config_dict["vision_config"]["image_size"] + image_width = config.vision_config.image_size img_input_shape = (args.image_batchsize, 3, image_width, image_width) text_input_shape = (args.text_batchsize, args.token_len) input_img = torch.randn(img_input_shape, dtype=torch.float32).detach().numpy() diff --git a/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py b/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py index 8a46d0e965..6995367ed9 100644 --- a/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py +++ b/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py @@ -21,6 +21,7 @@ import torch import onnxruntime as ort import numpy as np import torch.nn.functional as F +from transformers import AutoConfig logging.basicConfig(level=logging.INFO) @@ -50,14 +51,9 @@ def compare(args): else: aie_model = torch.load(args.clip_aie_path) aie_model.eval().to(device) - - hf_config_path = os.path.join(args.hf_model_path, "config.json") - if not os.path.exists(hf_config_path): - raise FileNotFoundError(f"config.json not found at {args.hf_model_path}: {hf_config_path}") - with open(hf_config_path, "r") as f: - config_dict = json.load(f) + config = AutoConfig.from_pretrained(args.hf_model_path) - image_width = config_dict["vision_config"]["image_size"] + image_width = config.vision_config.image_size img_input_shape = (args.image_batchsize, 3, image_width, image_width) text_input_shape = (args.text_batchsize, args.token_len) input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) -- Gitee From 2ee22f5ed1d87c290541118e3d22e953e9e4450d Mon Sep 17 00:00:00 2001 From: commc Date: Thu, 5 Sep 2024 17:08:09 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E6=AD=A3=E7=A1=AE=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=B1=82=E7=BA=A7=E7=BB=93=E6=9E=84=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/multimodal/CLIP/perf_test_aie.py | 95 +++++++++++++ .../multimodal/CLIP/perf_test_onnx.py | 94 +++++++++++++ .../multimodal/CLIP/precision_test.py | 130 ++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_aie.py create mode 100644 MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_onnx.py create mode 100644 MindIE/MindIE-Torch/built-in/multimodal/CLIP/precision_test.py diff --git a/MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_aie.py b/MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_aie.py new file mode 100644 index 0000000000..ceff8b8648 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_aie.py @@ -0,0 +1,95 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import logging +import argparse +import time +import torch +import mindietorch +from transformers import AutoConfig + +logging.basicConfig(level=logging.INFO) + + +def test(inputs, model, stream, meta=""): + # warmup + for _ in range(10): + with mindietorch.npu.stream(stream): + model(*inputs) + stream.synchronize() + + # performance test + num_infer = 100 + start = time.time() + for _ in range(num_infer): + with mindietorch.npu.stream(stream): + model(*inputs) + stream.synchronize() + end = time.time() + + logging.info("%s latency: %.2f ms", meta, (end - start) / num_infer * 1000) + logging.info("%s throughput: %.2f fps", meta, num_infer / (end - start)) + + +def test_clip(args): + device = f'npu:{args.device_id}' + stream = mindietorch.npu.Stream(device) + if args.clip_aie_path.endswith(".ts"): + model = torch.jit.load(args.clip_aie_path) + else: + model = torch.load(args.clip_aie_path) + model.eval().to(device) + config = AutoConfig.from_pretrained(args.hf_model_path) + + image_width = config.vision_config.image_size + img_input_shape = (args.image_batchsize, 3, image_width, image_width) + text_input_shape = (args.text_batchsize, args.token_len) + input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) + input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).to(device) + attention_mask = torch.ones(text_input_shape, dtype=torch.int32).to(device) + inputs = [input_ids, input_img, attention_mask] + + test(inputs, model, stream, "CLIP") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--device-id", type=int, help="NPU device id", default=0) + parser.add_argument( + "--clip-aie-path", + type=str, + default="/Path/to/compiled/aie_or_ts_model" + ) + parser.add_argument( + "--hf-model-path", + default="/Path/to/Huggingface_model_path", + type=str, + help="Huggingface CLIP Model Path." + ) + parser.add_argument("--text-batchsize", type=int, default=80) + parser.add_argument("--image-batchsize", type=int, default=1) + parser.add_argument("--token-len", type=int, default=52) + + return parser.parse_args() + + +def main(): + perf_args = parse_args() + mindietorch.set_device(perf_args.device_id) + test_clip(perf_args) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_onnx.py b/MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_onnx.py new file mode 100644 index 0000000000..3dbc7f9d67 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/multimodal/CLIP/perf_test_onnx.py @@ -0,0 +1,94 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import logging +import argparse +import time +import torch +import onnxruntime as ort +from transformers import AutoConfig + +logging.basicConfig(level=logging.INFO) + + +def test(encoder_path, provider, output_names, onnx_inputs, meta=""): + onnx_model = ort.InferenceSession( + encoder_path, + providers=[provider] + ) + + # warmup + for _ in range(10): + onnx_model.run(output_names, onnx_inputs) + # performance test + num_infer = 100 + start = time.time() + for _ in range(num_infer): + onnx_model.run(output_names, onnx_inputs) + end = time.time() + + logging.info("%s latency: %.2f ms", meta, (end - start) / num_infer * 1000) + logging.info("%s throughput: %.2f fps", meta, num_infer / (end - start)) + + +def test_clip(args, provider): + config = AutoConfig.from_pretrained(args.hf_model_path) + + image_width = config.vision_config.image_size + img_input_shape = (args.image_batchsize, 3, image_width, image_width) + text_input_shape = (args.text_batchsize, args.token_len) + input_img = torch.randn(img_input_shape, dtype=torch.float32).detach().numpy() + input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).detach().numpy() + attention_mask = torch.ones(text_input_shape, dtype=torch.int32).detach().numpy() + + onnx_inputs = {"input_ids": input_ids, "pixel_values": input_img, "attention_mask": attention_mask} + output_names = ["image_embeds", "text_embeds", "logits_per_text", "logits_per_image"] + + test(args.onnx_path, provider, output_names, onnx_inputs, "CLIP") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--onnx-path", + type=str, + default="/Path/to/onnx_model" + ) + parser.add_argument( + "--hf-model-path", + default="/Path/to/Huggingface_model_path", + type=str, + help="Huggingface CLIP Model Path." + ) + parser.add_argument("--text-batchsize", type=int, default=80) + parser.add_argument("--image-batchsize", type=int, default=1) + parser.add_argument("--token-len", type=int, default=52) + parser.add_argument("--use-gpu", action="store_true") + + return parser.parse_args() + + +def main(): + perf_args = parse_args() + if perf_args.use_gpu: + provider = "CUDAExecutionProvider" + else: + provider = "CPUExecutionProvider" + + test_clip(perf_args, provider) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/multimodal/CLIP/precision_test.py b/MindIE/MindIE-Torch/built-in/multimodal/CLIP/precision_test.py new file mode 100644 index 0000000000..6995367ed9 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/multimodal/CLIP/precision_test.py @@ -0,0 +1,130 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json +import logging +import argparse +import torch +import mindietorch +import torch +import onnxruntime as ort +import numpy as np +import torch.nn.functional as F +from transformers import AutoConfig + +logging.basicConfig(level=logging.INFO) + + +def compare_onnx_aie_output(onnx_out, aie_out, sim_threshold=0.99): + num_sim = 0 + for i, (a, b) in enumerate(zip(onnx_out, aie_out)): + a = a.reshape(1, -1).astype(np.float32) + b = b.reshape(1, -1) + sim = F.cosine_similarity(torch.from_numpy(a), b, dim=1) + if sim > sim_threshold: + num_sim += 1 + else: + logging.info('Output %d similarity: %f', i, sim) + + logging.info('Number of outputs to compare: %d', len(onnx_out)) + logging.info('Number of outputs with cosine similarity > %.2f: %d', sim_threshold, num_sim) + + +def compare(args): + # MindIETorch + device = f'npu:{args.device_id}' + stream = mindietorch.npu.Stream(device) + + if args.clip_aie_path.endswith(".ts"): + aie_model = torch.jit.load(args.clip_aie_path) + else: + aie_model = torch.load(args.clip_aie_path) + aie_model.eval().to(device) + config = AutoConfig.from_pretrained(args.hf_model_path) + + image_width = config.vision_config.image_size + img_input_shape = (args.image_batchsize, 3, image_width, image_width) + text_input_shape = (args.text_batchsize, args.token_len) + input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) + input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).to(device) + attention_mask = torch.ones(text_input_shape, dtype=torch.int32).to(device) + inputs = [input_ids, input_img, attention_mask] + + with mindietorch.npu.stream(stream): + aie_out = aie_model(*inputs) + stream.synchronize() + + if isinstance(aie_out, tuple) or isinstance(aie_out, list): + aie_out = (x.cpu() for x in aie_out) + else: + aie_out = aie_out.cpu() + + # ONNX + input_img = input_img.cpu().detach().numpy() + input_ids = input_ids.cpu().detach().numpy() + attention_mask = attention_mask.cpu().detach().numpy() + + if args.use_gpu: + provider = "CUDAExecutionProvider" + else: + provider = "CPUExecutionProvider" + + onnx_model = ort.InferenceSession( + args.clip_onnx_path, + providers=[provider] + ) + onnx_inputs = {"input_ids": input_ids, "pixel_values": input_img, "attention_mask": attention_mask} + output_names = ["image_embeds", "text_embeds", "logits_per_text", "logits_per_image"] + onnx_out = onnx_model.run(output_names, onnx_inputs) + + compare_onnx_aie_output(onnx_out, aie_out, args.sim_threshold) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--device-id", type=int, default=0, help="NPU device id") + parser.add_argument( + "--clip-aie-path", + type=str, + default="/Path/to/compiled/aie_or_ts_model" + ) + parser.add_argument( + "--clip-onnx-path", + type=str, + default="/Path/to/onnx_model" + ) + parser.add_argument( + "--hf-model-path", + default="/Path/to/Huggingface_model_path", + type=str, + help="Huggingface CLIP Model Path." + ) + parser.add_argument("--text-batchsize", type=int, default=80) + parser.add_argument("--image-batchsize", type=int, default=1) + parser.add_argument("--token-len", type=int, default=52) + parser.add_argument('--sim-threshold', type=float, default=0.99) + parser.add_argument("--use-gpu", action="store_true") + + return parser.parse_args() + + +def main(): + compare_args = parse_args() + mindietorch.set_device(compare_args.device_id) + logging.info('=== Compare the outputs of ONNX and AIE ===') + compare(compare_args) + + +if __name__ == "__main__": + main() \ No newline at end of file -- Gitee From b6d075ea15848d2f9cc163d00a30b01049571d57 Mon Sep 17 00:00:00 2001 From: commc Date: Thu, 5 Sep 2024 17:09:53 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E5=88=A0=E9=99=A4=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E7=BB=93=E6=9E=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/multimodal/perf_test_aie.py | 95 ------------- .../built-in/multimodal/perf_test_onnx.py | 94 ------------- .../built-in/multimodal/precision_test.py | 130 ------------------ 3 files changed, 319 deletions(-) delete mode 100644 MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py delete mode 100644 MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py delete mode 100644 MindIE/MindIE-Torch/built-in/multimodal/precision_test.py diff --git a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py deleted file mode 100644 index ceff8b8648..0000000000 --- a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_aie.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2024 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import json -import logging -import argparse -import time -import torch -import mindietorch -from transformers import AutoConfig - -logging.basicConfig(level=logging.INFO) - - -def test(inputs, model, stream, meta=""): - # warmup - for _ in range(10): - with mindietorch.npu.stream(stream): - model(*inputs) - stream.synchronize() - - # performance test - num_infer = 100 - start = time.time() - for _ in range(num_infer): - with mindietorch.npu.stream(stream): - model(*inputs) - stream.synchronize() - end = time.time() - - logging.info("%s latency: %.2f ms", meta, (end - start) / num_infer * 1000) - logging.info("%s throughput: %.2f fps", meta, num_infer / (end - start)) - - -def test_clip(args): - device = f'npu:{args.device_id}' - stream = mindietorch.npu.Stream(device) - if args.clip_aie_path.endswith(".ts"): - model = torch.jit.load(args.clip_aie_path) - else: - model = torch.load(args.clip_aie_path) - model.eval().to(device) - config = AutoConfig.from_pretrained(args.hf_model_path) - - image_width = config.vision_config.image_size - img_input_shape = (args.image_batchsize, 3, image_width, image_width) - text_input_shape = (args.text_batchsize, args.token_len) - input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) - input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).to(device) - attention_mask = torch.ones(text_input_shape, dtype=torch.int32).to(device) - inputs = [input_ids, input_img, attention_mask] - - test(inputs, model, stream, "CLIP") - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--device-id", type=int, help="NPU device id", default=0) - parser.add_argument( - "--clip-aie-path", - type=str, - default="/Path/to/compiled/aie_or_ts_model" - ) - parser.add_argument( - "--hf-model-path", - default="/Path/to/Huggingface_model_path", - type=str, - help="Huggingface CLIP Model Path." - ) - parser.add_argument("--text-batchsize", type=int, default=80) - parser.add_argument("--image-batchsize", type=int, default=1) - parser.add_argument("--token-len", type=int, default=52) - - return parser.parse_args() - - -def main(): - perf_args = parse_args() - mindietorch.set_device(perf_args.device_id) - test_clip(perf_args) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py b/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py deleted file mode 100644 index 3dbc7f9d67..0000000000 --- a/MindIE/MindIE-Torch/built-in/multimodal/perf_test_onnx.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2024 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import json -import logging -import argparse -import time -import torch -import onnxruntime as ort -from transformers import AutoConfig - -logging.basicConfig(level=logging.INFO) - - -def test(encoder_path, provider, output_names, onnx_inputs, meta=""): - onnx_model = ort.InferenceSession( - encoder_path, - providers=[provider] - ) - - # warmup - for _ in range(10): - onnx_model.run(output_names, onnx_inputs) - # performance test - num_infer = 100 - start = time.time() - for _ in range(num_infer): - onnx_model.run(output_names, onnx_inputs) - end = time.time() - - logging.info("%s latency: %.2f ms", meta, (end - start) / num_infer * 1000) - logging.info("%s throughput: %.2f fps", meta, num_infer / (end - start)) - - -def test_clip(args, provider): - config = AutoConfig.from_pretrained(args.hf_model_path) - - image_width = config.vision_config.image_size - img_input_shape = (args.image_batchsize, 3, image_width, image_width) - text_input_shape = (args.text_batchsize, args.token_len) - input_img = torch.randn(img_input_shape, dtype=torch.float32).detach().numpy() - input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).detach().numpy() - attention_mask = torch.ones(text_input_shape, dtype=torch.int32).detach().numpy() - - onnx_inputs = {"input_ids": input_ids, "pixel_values": input_img, "attention_mask": attention_mask} - output_names = ["image_embeds", "text_embeds", "logits_per_text", "logits_per_image"] - - test(args.onnx_path, provider, output_names, onnx_inputs, "CLIP") - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--onnx-path", - type=str, - default="/Path/to/onnx_model" - ) - parser.add_argument( - "--hf-model-path", - default="/Path/to/Huggingface_model_path", - type=str, - help="Huggingface CLIP Model Path." - ) - parser.add_argument("--text-batchsize", type=int, default=80) - parser.add_argument("--image-batchsize", type=int, default=1) - parser.add_argument("--token-len", type=int, default=52) - parser.add_argument("--use-gpu", action="store_true") - - return parser.parse_args() - - -def main(): - perf_args = parse_args() - if perf_args.use_gpu: - provider = "CUDAExecutionProvider" - else: - provider = "CPUExecutionProvider" - - test_clip(perf_args, provider) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py b/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py deleted file mode 100644 index 6995367ed9..0000000000 --- a/MindIE/MindIE-Torch/built-in/multimodal/precision_test.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2024 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import json -import logging -import argparse -import torch -import mindietorch -import torch -import onnxruntime as ort -import numpy as np -import torch.nn.functional as F -from transformers import AutoConfig - -logging.basicConfig(level=logging.INFO) - - -def compare_onnx_aie_output(onnx_out, aie_out, sim_threshold=0.99): - num_sim = 0 - for i, (a, b) in enumerate(zip(onnx_out, aie_out)): - a = a.reshape(1, -1).astype(np.float32) - b = b.reshape(1, -1) - sim = F.cosine_similarity(torch.from_numpy(a), b, dim=1) - if sim > sim_threshold: - num_sim += 1 - else: - logging.info('Output %d similarity: %f', i, sim) - - logging.info('Number of outputs to compare: %d', len(onnx_out)) - logging.info('Number of outputs with cosine similarity > %.2f: %d', sim_threshold, num_sim) - - -def compare(args): - # MindIETorch - device = f'npu:{args.device_id}' - stream = mindietorch.npu.Stream(device) - - if args.clip_aie_path.endswith(".ts"): - aie_model = torch.jit.load(args.clip_aie_path) - else: - aie_model = torch.load(args.clip_aie_path) - aie_model.eval().to(device) - config = AutoConfig.from_pretrained(args.hf_model_path) - - image_width = config.vision_config.image_size - img_input_shape = (args.image_batchsize, 3, image_width, image_width) - text_input_shape = (args.text_batchsize, args.token_len) - input_img = torch.randn(img_input_shape, dtype=torch.float32).to(device) - input_ids = torch.randint(high=1000, size=text_input_shape, dtype=torch.int32).to(device) - attention_mask = torch.ones(text_input_shape, dtype=torch.int32).to(device) - inputs = [input_ids, input_img, attention_mask] - - with mindietorch.npu.stream(stream): - aie_out = aie_model(*inputs) - stream.synchronize() - - if isinstance(aie_out, tuple) or isinstance(aie_out, list): - aie_out = (x.cpu() for x in aie_out) - else: - aie_out = aie_out.cpu() - - # ONNX - input_img = input_img.cpu().detach().numpy() - input_ids = input_ids.cpu().detach().numpy() - attention_mask = attention_mask.cpu().detach().numpy() - - if args.use_gpu: - provider = "CUDAExecutionProvider" - else: - provider = "CPUExecutionProvider" - - onnx_model = ort.InferenceSession( - args.clip_onnx_path, - providers=[provider] - ) - onnx_inputs = {"input_ids": input_ids, "pixel_values": input_img, "attention_mask": attention_mask} - output_names = ["image_embeds", "text_embeds", "logits_per_text", "logits_per_image"] - onnx_out = onnx_model.run(output_names, onnx_inputs) - - compare_onnx_aie_output(onnx_out, aie_out, args.sim_threshold) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--device-id", type=int, default=0, help="NPU device id") - parser.add_argument( - "--clip-aie-path", - type=str, - default="/Path/to/compiled/aie_or_ts_model" - ) - parser.add_argument( - "--clip-onnx-path", - type=str, - default="/Path/to/onnx_model" - ) - parser.add_argument( - "--hf-model-path", - default="/Path/to/Huggingface_model_path", - type=str, - help="Huggingface CLIP Model Path." - ) - parser.add_argument("--text-batchsize", type=int, default=80) - parser.add_argument("--image-batchsize", type=int, default=1) - parser.add_argument("--token-len", type=int, default=52) - parser.add_argument('--sim-threshold', type=float, default=0.99) - parser.add_argument("--use-gpu", action="store_true") - - return parser.parse_args() - - -def main(): - compare_args = parse_args() - mindietorch.set_device(compare_args.device_id) - logging.info('=== Compare the outputs of ONNX and AIE ===') - compare(compare_args) - - -if __name__ == "__main__": - main() \ No newline at end of file -- Gitee