diff --git a/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/README.md b/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/README.md index 2e1dcb199d8093c7f0a362449cd5fa44f7873e16..0e91701736fb333e23ffd9503738186b639aacd0 100644 --- a/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/README.md +++ b/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/README.md @@ -31,7 +31,7 @@ | CANN | 8.1.RC1 | 包含kernels包和toolkit包 | | Python | 3.10 | - | | PyTorch | 2.5.1 | - | - | Ascend Extension PyTorch | 2.5.1.post2 | - | + | Ascend Extension PyTorch | 2.5.1 | - | | 说明:Atlas 800I A2 推理卡和Atlas 300I DUO 推理卡请以CANN版本选择实际固件与驱动版本。 | \ | \ | @@ -80,7 +80,7 @@ 2. 安装依赖 ``` - pip3 install transformers==4.35.2 + pip3 install transformers==4.35.2 torch==2.5.1 torch_npu==2.5.1 protobuf numpy==1.26.4 decorator attrs psutil scipy ``` diff --git a/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/infer.py b/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/infer.py index d21a875ad845a80f5ac5445ef57982a692295da1..76d8b0f25d9cbc0995856f6070099e313bc1eacc 100644 --- a/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/infer.py +++ b/ACL_PyTorch/built-in/embedding/jina-embeddings-v2-base-zh/infer.py @@ -127,14 +127,15 @@ def rewrite_JinaBertGLUMLP_forward(model): def forward(hidden_states: torch.Tensor) -> torch.Tensor: residual_connection = hidden_states # compute the activation - hidden_states = self.gated_layers(hidden_states) + hidden_states = model.gated_layers(hidden_states) gated, non_gated = hidden_states.chunk(2, dim=2) - hidden_states = self.act(gated) * non_gated - hidden_states = self.dropout(hidden_states) + hidden_states = model.act(gated) * non_gated + hidden_states = model.dropout(hidden_states) # multiply by the second matrix - hidden_states = self.wo(hidden_states) + hidden_states = model.wo(hidden_states) # add the residual connection and post-LN - hidden_states = self.layernorm(hidden_states + residual_connection) + hidden_states = model.layernorm(hidden_states + residual_connection) + return hidden_states model.forward = forward @@ -147,7 +148,7 @@ def modify_model(model): model.npu().eval().half() -if name == '__main__': +if __name__ == '__main__': args = parse_args() torch_npu.npu.set_compile_mode(jit_compile=False) @@ -179,4 +180,4 @@ if name == '__main__': ['How is the weather today?', '今天天气怎么样?', ], convert_to_tensor=True) - print(f'E2E time = {(time.time() - start) / args.loop *1000}ms') + print(f'E2E time = {(time.time() - start) / args.loop *1000}ms') \ No newline at end of file