diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md index 9d2618ebf06d04d6fc179cd22f1ed04e6e39d0fe..e3d5cafbe8d26f37f43144049d41613ac5b2fba8 100644 --- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md +++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/README.md @@ -102,7 +102,9 @@ python3 adaptnoflashencoder.py生成no_flash_encoder_revise.onnx | 模型 | 官网pth精度 | 710/310离线推理精度 | gpu性能 | 710性能 | 310性能 | | :---: | :----------------------------: | :-------------------------: | :-----: | :-----: | ------- | -| wenet | GPU流式:5.94%, 非流式:4.64% | 流式:5.66%, 非流式:5.66% | 66fps | 5.8fps | 11.6fps | +| wenet | GPU流式:5.94%, 非流式:4.64% | 流式:5.66%, 非流式:5.66% | 66fps | 7.69 | 11.6fps | + +生成的t1.json, t2.json文件中分别为encoder,decoder耗时,将其相加即可,运行python3.7.5 infer_perf.py 静态shape场景(仅支持非流式场景): diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/get_no_flash_encoder_out.diff b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/get_no_flash_encoder_out.diff index 81924cf956bd0b4dc74061466101d419c639eee7..b209edb704a1cc238363120b779cbf3dc2556058 100644 --- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/get_no_flash_encoder_out.diff +++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/get_no_flash_encoder_out.diff @@ -32,7 +32,7 @@ index 73990fa..e2f3555 100644 + y, exe_time = encoder_model_noflash( + [speech.numpy(), speech_lengths.numpy().astype("int32")]) # (beam_size, max_hyps_len, vocab_size) + encoder_out, encoder_mask = torch.from_numpy(y[0]), torch.from_numpy(y[1]) -+ return encoder_out, encoder_mask ++ return encoder_out, encoder_mask, exe_time def recognize( self, diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/getwer.diff b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/getwer.diff index a40643cc7a192ff0d4090319d78279ce4921477a..633513671cfe36099d7139f6f2245c7511d39b69 100644 --- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/getwer.diff +++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/getwer.diff @@ -167,7 +167,7 @@ index 73990fa..82337ca 100644 + if score > best_score: + best_score = score + best_index = i -+ return hyps[best_index][0] ++ return hyps[best_index][0], exe_time + def attention_rescoring( self, diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/infer_perf.py b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/infer_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..acff85e65551425bc30948b145bc7d209f5e3b7f --- /dev/null +++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/infer_perf.py @@ -0,0 +1,24 @@ +# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +t1 = {} +with open("t1.json", 'r') as load_f: + t1 = json.load(load_f) + +t2 = {} +with open("t2.json", 'r') as load_f: + t2 = json.load(load_f) + +perf = t1["t1"] + t2["t2"] +print("fps:", 1000 / perf) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py index 9c92947c8300805a171039a7b9a4bb91b3acf0d0..709d6f199db3fd81d919030a6bccd2f85a6e35b7 100644 --- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py +++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/process_encoder_data_noflash.py @@ -174,7 +174,7 @@ if __name__ == '__main__': #init acl if os.path.exists(args.json_path): os.remove(args.json_path) - + total_t = 0 encoder_dic = {} import time for batch_idx, batch in enumerate(test_data_loader): @@ -185,7 +185,7 @@ if __name__ == '__main__': feats_lengths = feats_lengths.to(device) target_lengths = target_lengths.to(device) assert (feats.size(0) == 1) - encoder_out, encoder_mask = model.get_no_flash_encoder_out( + encoder_out, encoder_mask, exe_time = model.get_no_flash_encoder_out( encoder_model_noflash, batch_idx, feats, @@ -196,11 +196,14 @@ if __name__ == '__main__': ctc_weight=args.ctc_weight, simulate_streaming=args.simulate_streaming, reverse_weight=args.reverse_weight) - + total_t += exe_time encoder_dic["encoder_out_"+ str(batch_idx)] = [encoder_out.shape[0], encoder_out.shape[1],encoder_out.shape[2]] encoder_dic["encoder_mask_"+ str(batch_idx)] = [encoder_mask.shape[0], encoder_mask.shape[1],encoder_mask.shape[2]] encoder_out.numpy().tofile(os.path.join(args.bin_path, "encoder_out_{}.bin".format(batch_idx))) encoder_mask.numpy().tofile(os.path.join(args.bin_path, "encoder_mask_{}.bin".format(batch_idx))) - + ave_t = total_t / (batch_idx + 1) + dic_perf = {} + dic_perf["t1"] = ave_t + dic2json(dic_perf, "t1.json") dic2json(encoder_dic, args.json_path) diff --git a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py index 37ce531b198b9d077669ba0ef21c4fc9d29f590e..bdc02ad3a406b7718a37e2747f581a7dcb549d3d 100644 --- a/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py +++ b/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch/recognize_attenstion_rescoring.py @@ -52,6 +52,12 @@ import acl from wenet.transformer.acl_net import Net import json import os + +def dic2json(input_dict, json_path): + json_str = json.dumps(input_dict) + with open(json_path, 'a') as json_file: + json_file.write(json_str) + if __name__ == '__main__': parser = argparse.ArgumentParser(description='recognize with your model') parser.add_argument('--config', required=True, help='config file') @@ -149,7 +155,7 @@ if __name__ == '__main__': model = model.to(device) model.eval() - + total_t = 0 #init acl ret = acl.init() device_id = 0 @@ -169,7 +175,7 @@ if __name__ == '__main__': feats_lengths = feats_lengths.to(device) target_lengths = target_lengths.to(device) assert (feats.size(0) == 1) - hyp = model.get_wer( + hyp, exe_time = model.get_wer( batch_idx, bin_path, json_data, @@ -182,6 +188,7 @@ if __name__ == '__main__': ctc_weight=args.ctc_weight, simulate_streaming=args.simulate_streaming, reverse_weight=args.reverse_weight) + total_t += exe_time hyps = [hyp] for i, key in enumerate(keys): content = '' @@ -191,3 +198,8 @@ if __name__ == '__main__': content += char_dict[w] logging.info('{} {}'.format(key, content)) fout.write('{} {}\n'.format(key, content)) + ave_t = total_t / (batch_idx + 1) + dic_perf = {} + dic_perf["t2"] = ave_t + if "no" in args.bin_path: + dic2json(dic_perf, "t2.json")