From 8e59f2b482c25608e21ca51ae57d8d8bfa249f23 Mon Sep 17 00:00:00 2001
From: Lanxi <work_lanxi@hotmail.com>
Date: Wed, 20 Mar 2024 15:22:00 +0800
Subject: [PATCH 1/4] documents and scripts for conformer

---
 .../audio/ConformerOfflineModel/data_gen.py   |  14 ++
 .../modify_decoder_onnx.py                    |  35 ++++
 .../modify_encoder_onnx.py                    |  35 ++++
 .../ConformerOfflineModel/precision_test.py   |  66 +++++++
 .../audio/ConformerOfflineModel/readme.md     | 179 ++++++++++++++++++
 5 files changed, 329 insertions(+)
 create mode 100644 ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py
 create mode 100644 ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_decoder_onnx.py
 create mode 100644 ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_encoder_onnx.py
 create mode 100644 ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
 create mode 100644 ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md

diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py
new file mode 100644
index 0000000000..86ee1e3935
--- /dev/null
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py
@@ -0,0 +1,14 @@
+#import numpy as np
+
+# Generating two numpy arrays with specified requirements
+#array1 = np.random.rand(1, 100, 80).astype(np.float32)  # Shape (1, 100, 80), fp32
+#array2 = np.random.randint(0, 100, size=(1,)).astype(np.int64)  # Shape (1,), int64
+
+# Saving to .npy files
+#np.save('x.npy', array1)
+#np.save('x_lens.npy', array2)
+import numpy as np
+
+x, x_lens = np.ones((1, 100, 80), dtype=np.float32), np.array([100])
+np.save('x.npy', x)
+np.save('x_lens.npy', x_lens)
diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_decoder_onnx.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_decoder_onnx.py
new file mode 100644
index 0000000000..c972709d20
--- /dev/null
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_decoder_onnx.py
@@ -0,0 +1,35 @@
+# Copyright(C) 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+from auto_optimizer import OnnxGraph
+
+
+def main():
+    onnx_path = sys.argv[1]
+    graph = OnnxGraph.parse(onnx_path)
+    graph.remove("/decoder/Clip")
+    gather = graph["/decoder/embedding/Gather"]
+    gather.inputs[1] = "y"
+    graph.update_map()
+    graph.infershape()
+
+    g_sim = graph.simplify()
+    save_path = onnx_path.replace(".onnx", "_modified.onnx")
+    g_sim.save(save_path)
+    print("Modified model saved to ", save_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_encoder_onnx.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_encoder_onnx.py
new file mode 100644
index 0000000000..f9aef35d11
--- /dev/null
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/modify_encoder_onnx.py
@@ -0,0 +1,35 @@
+# Copyright(C) 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+from auto_optimizer import OnnxGraph
+
+
+def main():
+    onnx_path = sys.argv[1]
+    graph = OnnxGraph.parse(onnx_path)
+    reduceMax = graph.get_nodes('ReduceMax')[0]
+    reduceMax.attrs['axes'] = [0]
+
+    graph.update_map()
+    graph.infershape()
+
+    g_sim = graph.simplify()
+    save_path = onnx_path.replace(".onnx", "_modified.onnx")
+    g_sim.save(save_path)
+    print("Modified model saved to ", save_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
new file mode 100644
index 0000000000..26ae8bd5d4
--- /dev/null
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
@@ -0,0 +1,66 @@
+import sys
+
+import numpy as np
+from pruned_transducer_stateless5.onnx_pretrained import OnnxModel
+from ais_bench.infer.interface import InferSession
+
+import torch
+from torch.nn.functional import cosine_similarity
+
+# Initialize the ONNX model globally
+onnxmodel = OnnxModel("./exp/encoder-epoch-99-avg-1.onnx", "./exp/decoder-epoch-99-avg-1.onnx", "./exp/joiner-epoch-99-avg-1.onnx")
+
+def is_close_to_ones(x1, atol=1e-04):
+    x2 = torch.ones_like(x1)
+    return torch.allclose(x1, x2, atol)
+
+def precision_test(om_output, onnx_output):
+    result = is_close_to_ones(cosine_similarity(om_output, onnx_output))
+    print("Precision test passed" if result else "Precision test failed")
+
+def run_infer_session(session, inputs, custom_sizes=None):
+    if custom_sizes is not None:
+        return session.infer(inputs, 'dymshape', custom_sizes=custom_sizes)
+    else:
+        return session.infer(inputs)
+
+def evaluate_model(mode, om_path):
+    session = InferSession(0, om_path)
+    
+    if mode == 'encoder':
+        x, x_lens = np.random.rand(1, 100, 80).astype(np.float32), np.array([100])
+        output_size = 100000
+        om_outputs = run_infer_session(session, [x, x_lens], custom_sizes=output_size)
+        
+        x_tensor, x_lens_tensor = torch.from_numpy(x), torch.from_numpy(x_lens)
+        onnx_output, _ = onnxmodel.run_encoder(x_tensor, x_lens_tensor)
+    
+    elif mode == 'decoder':
+        y = np.random.randint(0, 10, size=(1, 2)).astype(np.int64)
+        om_outputs = run_infer_session(session, [y])
+        
+        y_tensor = torch.from_numpy(y)
+        onnx_output = onnxmodel.run_decoder(y_tensor)
+    
+    elif mode == 'joiner':
+        enc, dec = np.random.rand(1, 512).astype(np.float32), np.random.rand(1, 512).astype(np.float32)
+        om_outputs = run_infer_session(session, [enc, dec])
+        
+        enc_tensor, dec_tensor = torch.from_numpy(enc), torch.from_numpy(dec)
+        onnx_output = onnxmodel.run_joiner(enc_tensor, dec_tensor)
+    
+    else:
+        raise ValueError("Invalid mode")
+
+    om_output = torch.from_numpy(om_outputs[0])
+    precision_test(om_output, onnx_output)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: <script> [encoder|decoder|joiner] <om_path>")
+        sys.exit(1)
+    
+    mode = sys.argv[1]
+    om_path = sys.argv[2]
+    print("Evaluating precision...")
+    evaluate_model(mode, om_path)
diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
new file mode 100644
index 0000000000..80376e39a6
--- /dev/null
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
@@ -0,0 +1,179 @@
+# Conformer模型- OM推理指导
+
+
+- [概述](#ZH-CN_TOPIC_0000001172161501)
+
+- [推理环境准备](#ZH-CN_TOPIC_0000001126281702)
+
+- [快速上手](#ZH-CN_TOPIC_0000001126281700)
+
+- [模型推理性能精度](#ZH-CN_TOPIC_0000001172201573)
+
+
+# 概述<a name="ZH-CN_TOPIC_0000001172161501"></a>
+
+Conformer模型是一种混合神经网络架构，专门设计用于处理序列到序列的任务，如自动语音识别（ASR）。它融合了卷积神经网络（CNN）和自注意力机制（来自Transformer模型）的优点，旨在捕捉序列数据的局部特征和全局依赖。Conformer通过在其架构中巧妙地结合这两种方法，有效地处理了时间序列数据的复杂性，比如语音波形，从而在许多任务上实现了卓越的性能。简而言之，Conformer通过集成CNN的强大特征提取能力和Transformer的高效序列建模能力，为序列分析任务提供了一种强大的解决方案。
+  
+
+# 推理环境准备\[所有版本\]<a name="ZH-CN_TOPIC_0000001126281702"></a>
+
+- 该模型需要以下依赖
+
+  **表 1**  版本配套表
+
+  | 配套                                                         | 版本      |
+  |---------|---------|
+   | CANN                        | 7.0RC1      | -                                                       |
+   | Python                      | 3.10.13     |                                                           
+   | torch                       | 2.1.0       |
+   | torchaudio                    | 2.1.0         |
+   | onnxsim                        | 0.4.36|
+   | 芯片类型                        | Ascend310P3 | -  
+
+# 快速上手<a name="ZH-CN_TOPIC_0000001126281700"></a>
+
+## 环境安装
+
+1. 安装k2
+   1. （NPU）x86环境  
+    ```shell
+    wget https://huggingface.co/csukuangfj/k2/resolve/main/cpu/k2-1.24.4.dev20231220+cpu.torch2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+    pip install k2-1.24.4.dev20231220+cpu.torch2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+    ```
+    2. （NPU/GPU）arm环境，需要从源码编译。
+    ```shell
+    git clone https://github.com/k2-fsa/k2.git
+    cd k2
+    export K2_MAKE_ARGS="-j"
+    python3 setup.py install
+    ```
+    若执行以上命令遇到错误，请参考[此链接](https://k2-fsa.github.io/k2/installation/from_source.html)。
+    3. (GPU) x86环境。从[此链接](https://k2-fsa.github.io/k2/cuda.html)下载对应CUDA版本的whl文件，然后使用pip进行安装。
+    4. 验证k2是否安装成功  
+    ```shell
+    python3 -m k2.version
+    ```
+2. 安装其他依赖
+    ```shell
+    pip install lhotse
+    pip install kaldifeat
+    ```
+3. 安装icefall
+    ```shell
+    git clone https://github.com/k2-fsa/icefall.git
+    git reset --hard e2fcb42f5f176d9e39eb38506ab99d0a3adaf202
+   
+    cd icefall
+    pip install -r requirements.txt
+    ```
+4. 将icefall加入环境变量, "/path/to/icefall"替换为icefall文件夹所在的路径。
+   **这一步很重要，否则会报icefall找不到的错误。**
+    ```shell
+    export PYTHONPATH=/path/to/icefall:$PYTHONPATH
+    ```
+5. 安装AIS_BENCH工具
+
+参考[链接](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_bench)中的说明进行安装
+
+## 模型转换
+1. 下载模型
+
+从以下HuggingFace链接下载所需文件，所需文件为/exp/pretrained_epoch_9_avg_1.pt, 和/data整个文件夹
+https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless5_offline/tree/main
+
+进入上一步中的icefall路径，并cd到egs/wenetspeech/ASR/中的，将pretrained_epoch_9_avg_1.pt放在ASR路径cd 下
+本项目的所有python脚本放在ASR路径下，即
+```
+wenetspeech/asr/
+                --data/
+                    --lang_char/
+                        --Linv.pt等文件
+                --exp/
+                -- precision_test.py
+                -- modify_decoder_onnx.py
+                -- modify_decoder_onnx.py
+                -- data_gen.py
+```
+2. onnx导出
+```shell
+#在ASR目录下执行
+python ./pruned_transducer_stateless5/export-onnx.py \
+  --lang-dir ./data/lang_char \
+  --epoch 99 \
+  --avg 1 \
+  --use-averaged-model 0 \
+  --exp-dir ./exp \
+  --num-encoder-layers 24 \
+  --dim-feedforward 1536 \
+  --nhead 8 \
+  --encoder-dim 384 \
+  --decoder-dim 512 \
+  --joiner-dim 512
+```
+导出后，Exp目录下会有encoder-epoch-99-avg-1.onnx
+decoder-epoch-99-avg-1.onnx, joiner-epoch-99-avg-1.onnx三个文件。
+对于encoder和decoder，还需要执行ONNXSIM进行模型简化
+
+```shell
+ onnxsim ./exp/encoder-epoch-99-avg-1.onnx ./simed_encoder.onnx
+```
+```
+ onnxsim ./exp/decoder-epoch-99-avg-1.onnx ./simed_decoder.onnx
+
+```
+#### 3. ONNX转换om模型
+
+#### 3.1 对于encoder，需要先进行改图
+```shell
+python modify_encoder_onnx.py ./simed_encoder.onnx
+```
+再进行om转换
+```
+atc --input_shape="x:1,100,80;x_lens:1" --precision_mode=force_fp32 --soc_version=Ascend310P3 --framework=5 --output=encoder_py310 --model ./simed_encoder_modified.onnx
+```
+
+#### 3.2 对于decoder，需要先进行改图
+```shell
+python modify_decoder_onnx.py ./simed_decoder.onnx
+```
+再进行OM转换
+```shell
+atc --input_shape="y:1,2" --precision_mode=force_fp32 --soc_version=Ascend310P3 --framework=5 --output=decoder_py310 --model=simed_decoder_modified.onnx
+```
+
+#### 3.3 Joiner模型可以直接转换OM
+```
+atc --input_shape="encoder_out:1,512;decoder_out:1,512" --soc_version=Ascend310P3 --framework=5 --output=joiner_py310 --model=./exp/joiner-epoch-99-avg-1.onnx
+```
+### 性能测试
+encoder模型性能测试：
+```shell
+python data_gen.py
+
+python -m ais_bench --model ./encoder_py310_linux_aarch64.om --input "x.npy,x_lens.npy" --dymShape "x:1,100,80;x_lens:1" --outputSize 1000000,1000000  --loop 100
+```
+
+decoder模型性能测试
+```shell
+python -m ais_bench --model ./decoder_py310.om --loop 1000
+```
+
+joiner模型性能测试
+```shell
+python -m ais_bench --model ./joiner_py310.om --loop 2000
+```
+### 精度测试
+encoder精度测试，脚本执行后显示Precision test passed为通过
+```shell
+python precision_test.py encoder ./encoder_py310_linux_aarch64.om
+```
+
+decoder精度测试，脚本执行后显示Precision test passed为通过
+```shell
+python precision_test.py decoder ./decoder_py310.om
+```
+
+joiner精度测试，脚本执行后显示Precision test passed为通过
+```shell
+python precision_test.py joiner ./joiner_py310.om
+```
\ No newline at end of file
-- 
Gitee


From 205c3fd5e2691e354fe3d061d9da8268caadee4f Mon Sep 17 00:00:00 2001
From: Lanxi <work_lanxi@hotmail.com>
Date: Sun, 24 Mar 2024 20:45:44 +0800
Subject: [PATCH 2/4] Update performane record

---
 .../built-in/audio/ConformerOfflineModel/readme.md       | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
index 80376e39a6..beb9fc471f 100644
--- a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
@@ -176,4 +176,11 @@ python precision_test.py decoder ./decoder_py310.om
 joiner精度测试，脚本执行后显示Precision test passed为通过
 ```shell
 python precision_test.py joiner ./joiner_py310.om
-```
\ No newline at end of file
+```
+
+### 性能数据
+|Model| OM  | T4| A10|
+|------| ----------------- |------| --------|
+|encoder|  33ms/29.6FPS | 20.53ms/48.70FPS   | 16.4ms / 60.9FPS|
+|decoder| 0.08ms/ 12528FPS | 0.13ms/7443FPS | 0.12ms/8333FPS |
+|joiner | 0.10ms/ 9696 | 0.13ms/7612FPS | 0.11ms/9212FPS |
\ No newline at end of file
-- 
Gitee


From 59880f30558e23922976acf773d2affd8dc63bcb Mon Sep 17 00:00:00 2001
From: Lanxi <work_lanxi@hotmail.com>
Date: Sun, 24 Mar 2024 20:50:41 +0800
Subject: [PATCH 3/4] cleancode

---
 .../built-in/audio/ConformerOfflineModel/data_gen.py     | 9 ---------
 .../audio/ConformerOfflineModel/precision_test.py        | 4 ++--
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py
index 86ee1e3935..437046d03a 100644
--- a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/data_gen.py
@@ -1,12 +1,3 @@
-#import numpy as np
-
-# Generating two numpy arrays with specified requirements
-#array1 = np.random.rand(1, 100, 80).astype(np.float32)  # Shape (1, 100, 80), fp32
-#array2 = np.random.randint(0, 100, size=(1,)).astype(np.int64)  # Shape (1,), int64
-
-# Saving to .npy files
-#np.save('x.npy', array1)
-#np.save('x_lens.npy', array2)
 import numpy as np
 
 x, x_lens = np.ones((1, 100, 80), dtype=np.float32), np.array([100])
diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
index 26ae8bd5d4..5441b68493 100644
--- a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
@@ -1,12 +1,12 @@
 import sys
 
 import numpy as np
-from pruned_transducer_stateless5.onnx_pretrained import OnnxModel
 from ais_bench.infer.interface import InferSession
-
 import torch
 from torch.nn.functional import cosine_similarity
 
+from pruned_transducer_stateless5.onnx_pretrained import OnnxModel
+
 # Initialize the ONNX model globally
 onnxmodel = OnnxModel("./exp/encoder-epoch-99-avg-1.onnx", "./exp/decoder-epoch-99-avg-1.onnx", "./exp/joiner-epoch-99-avg-1.onnx")
 
-- 
Gitee


From 86197df00ac0d829c9e9fbe80c4a475d4106c273 Mon Sep 17 00:00:00 2001
From: Lanxi <work_lanxi@hotmail.com>
Date: Tue, 26 Mar 2024 01:13:04 +0800
Subject: [PATCH 4/4] Fixed a fatal bug in precision_test.py

---
 .../audio/ConformerOfflineModel/precision_test.py        | 2 +-
 .../built-in/audio/ConformerOfflineModel/readme.md       | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
index 5441b68493..17cff92127 100644
--- a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/precision_test.py
@@ -1,11 +1,11 @@
 import sys
+from pruned_transducer_stateless5.onnx_pretrained import OnnxModel
 
 import numpy as np
 from ais_bench.infer.interface import InferSession
 import torch
 from torch.nn.functional import cosine_similarity
 
-from pruned_transducer_stateless5.onnx_pretrained import OnnxModel
 
 # Initialize the ONNX model globally
 onnxmodel = OnnxModel("./exp/encoder-epoch-99-avg-1.onnx", "./exp/decoder-epoch-99-avg-1.onnx", "./exp/joiner-epoch-99-avg-1.onnx")
diff --git a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
index beb9fc471f..fb1b60225a 100644
--- a/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
+++ b/ACL_PyTorch/built-in/audio/ConformerOfflineModel/readme.md
@@ -81,7 +81,8 @@ Conformer模型是一种混合神经网络架构，专门设计用于处理序
 从以下HuggingFace链接下载所需文件，所需文件为/exp/pretrained_epoch_9_avg_1.pt, 和/data整个文件夹
 https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless5_offline/tree/main
 
-进入上一步中的icefall路径，并cd到egs/wenetspeech/ASR/中的，将pretrained_epoch_9_avg_1.pt放在ASR路径cd 下
+进入上一步中的icefall路径，并cd到egs/wenetspeech/ASR/中的，将pretrained_epoch_9_avg_1.pt放在ASR/exp路径下**并改名为epoch-99.pt**
+
 本项目的所有python脚本放在ASR路径下，即
 ```
 wenetspeech/asr/
@@ -89,12 +90,18 @@ wenetspeech/asr/
                     --lang_char/
                         --Linv.pt等文件
                 --exp/
+                    -- epoch-99.pt
                 -- precision_test.py
                 -- modify_decoder_onnx.py
                 -- modify_decoder_onnx.py
                 -- data_gen.py
 ```
 2. onnx导出
+> *注意，如果曾经修改过conformer.py，需要还原此文件* 
+```shell
+git checkout -- ./pruned_transducer_stateless5/conformer.py
+```
+
 ```shell
 #在ASR目录下执行
 python ./pruned_transducer_stateless5/export-onnx.py \
-- 
Gitee