From 56b5a0567e4ab8b4bf009c896cf59349c12a8902 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Wed, 5 Feb 2025 18:01:52 +0800
Subject: [PATCH 01/18] add README.md

---
 .../DeepSeek-R1-Distill-Qwen-1.5B/README.md   | 205 +++++++++++++++++
 .../DeepSeek-R1-Distill-Qwen-7B/README.md     | 206 ++++++++++++++++++
 2 files changed, 411 insertions(+)
 create mode 100644 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
 create mode 100644 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
new file mode 100644
index 0000000000..c2ae4cf0a2
--- /dev/null
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -0,0 +1,205 @@
+
+# DeepseekR1
+
+## Usage
+
+We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model.
+
+## 权重
+
+**权重下载**
+
+- [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/tree/main)
+
+**权重转换**
+由于提供的是.safetensor权重，无需转换，可以直接使用。
+
+## 加载镜像
+前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配本模型的镜像包：1.0.0-800I-A2-py311-openeuler24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
+
+完成加载镜像后，请使用`docker images`命令确认查找具体镜像名称与标签。
+```shelll
+docker load -i mindie:1.0.0-800I-A2-py311-openeuler24.03-lts(下载的镜像名称与标签)
+```
+or
+```shelll
+docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签)
+```
+
+## 约束条件
+- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要1台800I A2 32G服务器或1台300I DUO服务器
+- 在300I DUO服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 当前支持TP=1/2/4/8推理
+
+## 新建容器
+
+目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Qwen-1.5B模型推理脚本，无需再额外下载魔乐仓库承载的模型适配代码，直接新建容器即可。
+
+执行以下启动命令（参考）：
+如果您使用的是root用户镜像（例如从Ascend Hub上取得），并且可以使用特权容器，请使用以下命令启动容器：
+```sh
+docker run -it -d --net=host --shm-size=1g \
+    --privileged \
+    --name <container-name> \
+    --device=/dev/davinci_manager \
+    --device=/dev/hisi_hdc \
+    --device=/dev/devmm_svm \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \
+    -v /usr/local/sbin:/usr/local/sbin:ro \
+    -v /path-to-weights:/path-to-weights:ro \
+    mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash
+```
+
+如果您希望使用自行构建的普通用户镜像，并且规避容器相关权限风险，可以使用以下命令指定用户与设备：
+```sh
+docker run -it -d --net=host --shm-size=1g \
+    --user mindieuser:<HDK-user-group> \
+    --name <container-name> \
+    --device=/dev/davinci_manager \
+    --device=/dev/hisi_hdc \
+    --device=/dev/devmm_svm \
+    --device=/dev/davinci0 \
+    --device=/dev/davinci1 \
+    --device=/dev/davinci2 \
+    --device=/dev/davinci3 \
+    --device=/dev/davinci4 \
+    --device=/dev/davinci5 \
+    --device=/dev/davinci6 \
+    --device=/dev/davinci7 \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \
+    -v /usr/local/sbin:/usr/local/sbin:ro \
+    -v /path-to-weights:/path-to-weights:ro \
+    mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash
+```
+> 注意，以上启动命令仅供参考，请根据需求自行修改再启动容器，尤其需要注意：
+>
+> 1. `--user`，如果您的环境中HDK是通过普通用户安装（例如默认的`HwHiAiUser`，可以通过`id HwHiAiUser`命令查看该用户组ID），请设置好对应的用户组，例如用户组1001可以使用HDK，则`--user mindieuser:1001`，镜像中默认使用的是用户组1000。如果您的HDK是由root用户安装，且指定了`--install-for-all`参数，则无需指定`--user`参数。
+>
+> 2. 设定容器名称`--name`与镜像名称，800I A2和300I DUO各自使用对应版本的镜像，例如800I A2服务器使用`mindie:1.0.0-py3.11-800I-A2-aarch64-Ubuntu22.04`。
+>
+> 3. 设定想要使用的卡号`--device`。
+>
+> 4. 设定权重挂载的路径，`-v /path-to-weights:/path-to-weights:ro`，注意，如果使用普通用户镜像，权重路径所属应为镜像内默认的1000用户，且权限可设置为750。可使用以下命令进行修改：
+>       ```sh
+>       chown -R 1000:1000 /path-to-weights
+>       chmod -R 755 /path-to-weights
+>       ```
+> 5. **在普通用户镜像中，注意所有文件均在 `/home/mindieuser` 下，请勿直接挂载 `/home` 目录，以免宿主机上存在相同目录，将容器内文件覆盖清除。**
+
+## 进入容器
+```shell
+docker exec -it ${容器名称} bash
+```
+
+## 纯模型推理
+
+### 对话测试
+进入llm_model路径
+
+```shell
+cd $ATB_SPEED_HOME_PATH
+```
+
+执行对话测试
+
+```shell
+torchrun --nproc_per_node 2 \
+         --master_port 20037 \
+         -m examples.run_pa \
+         --model_path {权重路径} \
+         --max_output_length 20
+```
+
+### 性能测试
+进入ModelTest路径
+```shell
+cd $ATB_SPEED_HOME_PATH/tests/modeltest/
+```
+运行测试脚本
+```shell
+bash run.sh pa_[data_type] performance [case_pair] [batch_size] ([prefill_batch_size]) [model_name] ([is_chat_model]) (lora [lora_data_path]) [weight_dir] ([trust_remote_code]) [chip_num] ([parallel_params]) ([max_position_embedding/max_sequence_length])
+```
+具体执行batch=1, 输入长度256, 输出长度256用例的2卡并行性能测试命令为：
+```shell
+bash run.sh pa_bf16 performance [[256,256]] 1 qwen ${weight_path} 2
+```
+
+> 注：ModelTest为大模型的性能和精度提供测试功能。使用文档请参考`${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md`
+## 服务化推理
+
+
+- 打开配置文件
+
+```shell
+vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
+```
+
+- 更改配置文件
+
+```json
+{
+...
+"ServerConfig" :
+{
+...
+"port" : 1040, #自定义
+"managementPort" : 1041, #自定义
+"metricsPort" : 1042, #自定义
+...
+"httpsEnabled" : false,
+...
+},
+
+"BackendConfig": {
+...
+"npuDeviceIds" : [[0,1]],
+...
+"ModelDeployConfig":
+{
+"truncation" : false,
+"ModelConfig" : [
+{
+...
+"modelName" : "qwen",
+"modelWeightPath" : "/data/datasets/DeepSeek-R1-Distill-Qwen-1.5B",
+"worldSize" : 2,
+...
+}
+]
+},
+}
+}
+```
+
+- 拉起服务化
+
+```shell
+cd /usr/local/Ascend/mindie/latest/mindie-service/bin
+./mindieservice_daemon
+```
+
+- 新建窗口测试(VLLM接口)
+
+```shell
+curl 127.0.0.1:1040/generate -d '{
+"prompt": "What's deep learning?",
+"max_tokens": 32,
+"stream": false,
+"do_sample":true,
+"repetition_penalty": 1.00,
+"temperature": 0.01,
+"top_p": 0.001,
+"top_k": 1,
+"model": "qwen"
+}'
+```
+
+> 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html)
+
+## 常见问题
+1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。
+
+```shell
+pip install transformers==4.46.3 --force-reinstall
+pip install numpy==1.26.4 --force-reinstall
+```
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
new file mode 100644
index 0000000000..48a577d521
--- /dev/null
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -0,0 +1,206 @@
+
+# DeepseekR1
+
+## Usage
+
+We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model.
+
+## 权重
+
+**权重下载**
+
+- [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/tree/main)
+
+
+**权重转换**
+由于提供的是.safetensor权重，无需转换，可以直接使用。
+
+## 加载镜像
+前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配本模型的镜像包：1.0.0-800I-A2-py311-openeuler24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
+
+完成加载镜像后，请使用`docker images`命令确认查找具体镜像名称与标签。
+```shelll
+docker load -i mindie:1.0.0-800I-A2-py311-openeuler24.03-lts(下载的镜像名称与标签)
+```
+or
+```shelll
+docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签)
+```
+
+## 约束条件
+- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要1台800I A2 32G服务器或1台300I DUO服务器
+- 在300I DUO服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 当前支持TP=1/2/4/8推理
+
+## 新建容器
+
+目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Qwen-7B模型推理脚本，无需再额外下载魔乐仓库承载的模型适配代码，直接新建容器即可。
+
+执行以下启动命令（参考）：
+如果您使用的是root用户镜像（例如从Ascend Hub上取得），并且可以使用特权容器，请使用以下命令启动容器：
+```sh
+docker run -it -d --net=host --shm-size=1g \
+    --privileged \
+    --name <container-name> \
+    --device=/dev/davinci_manager \
+    --device=/dev/hisi_hdc \
+    --device=/dev/devmm_svm \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \
+    -v /usr/local/sbin:/usr/local/sbin:ro \
+    -v /path-to-weights:/path-to-weights:ro \
+    mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash
+```
+
+如果您希望使用自行构建的普通用户镜像，并且规避容器相关权限风险，可以使用以下命令指定用户与设备：
+```sh
+docker run -it -d --net=host --shm-size=1g \
+    --user mindieuser:<HDK-user-group> \
+    --name <container-name> \
+    --device=/dev/davinci_manager \
+    --device=/dev/hisi_hdc \
+    --device=/dev/devmm_svm \
+    --device=/dev/davinci0 \
+    --device=/dev/davinci1 \
+    --device=/dev/davinci2 \
+    --device=/dev/davinci3 \
+    --device=/dev/davinci4 \
+    --device=/dev/davinci5 \
+    --device=/dev/davinci6 \
+    --device=/dev/davinci7 \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \
+    -v /usr/local/sbin:/usr/local/sbin:ro \
+    -v /path-to-weights:/path-to-weights:ro \
+    mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash
+```
+> 注意，以上启动命令仅供参考，请根据需求自行修改再启动容器，尤其需要注意：
+>
+> 1. `--user`，如果您的环境中HDK是通过普通用户安装（例如默认的`HwHiAiUser`，可以通过`id HwHiAiUser`命令查看该用户组ID），请设置好对应的用户组，例如用户组1001可以使用HDK，则`--user mindieuser:1001`，镜像中默认使用的是用户组1000。如果您的HDK是由root用户安装，且指定了`--install-for-all`参数，则无需指定`--user`参数。
+>
+> 2. 设定容器名称`--name`与镜像名称，800I A2和300I DUO各自使用对应版本的镜像，例如800I A2服务器使用`mindie:1.0.0-py3.11-800I-A2-aarch64-Ubuntu22.04`。
+>
+> 3. 设定想要使用的卡号`--device`。
+>
+> 4. 设定权重挂载的路径，`-v /path-to-weights:/path-to-weights:ro`，注意，如果使用普通用户镜像，权重路径所属应为镜像内默认的1000用户，且权限可设置为750。可使用以下命令进行修改：
+>       ```sh
+>       chown -R 1000:1000 /path-to-weights
+>       chmod -R 755 /path-to-weights
+>       ```
+> 5. **在普通用户镜像中，注意所有文件均在 `/home/mindieuser` 下，请勿直接挂载 `/home` 目录，以免宿主机上存在相同目录，将容器内文件覆盖清除。**
+
+## 进入容器
+```shell
+docker exec -it ${容器名称} bash
+```
+
+## 纯模型推理
+
+### 对话测试
+进入llm_model路径
+
+```shell
+cd $ATB_SPEED_HOME_PATH
+```
+
+执行对话测试
+
+```shell
+torchrun --nproc_per_node 2 \
+         --master_port 20037 \
+         -m examples.run_pa \
+         --model_path {权重路径} \
+         --max_output_length 20
+```
+
+### 性能测试
+进入ModelTest路径
+```shell
+cd $ATB_SPEED_HOME_PATH/tests/modeltest/
+```
+运行测试脚本
+```shell
+bash run.sh pa_[data_type] performance [case_pair] [batch_size] ([prefill_batch_size]) [model_name] ([is_chat_model]) (lora [lora_data_path]) [weight_dir] ([trust_remote_code]) [chip_num] ([parallel_params]) ([max_position_embedding/max_sequence_length])
+```
+具体执行batch=1, 输入长度256, 输出长度256用例的2卡并行性能测试命令为：
+```shell
+bash run.sh pa_bf16 performance [[256,256]] 1 qwen ${weight_path} 2
+```
+
+> 注：ModelTest为大模型的性能和精度提供测试功能。使用文档请参考`${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md`
+## 服务化推理
+
+
+- 打开配置文件
+
+```shell
+vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
+```
+
+- 更改配置文件
+
+```json
+{
+...
+"ServerConfig" :
+{
+...
+"port" : 1040, #自定义
+"managementPort" : 1041, #自定义
+"metricsPort" : 1042, #自定义
+...
+"httpsEnabled" : false,
+...
+},
+
+"BackendConfig": {
+...
+"npuDeviceIds" : [[0,1]],
+...
+"ModelDeployConfig":
+{
+"truncation" : false,
+"ModelConfig" : [
+{
+...
+"modelName" : "qwen",
+"modelWeightPath" : "/data/datasets/DeepSeek-R1-Distill-Qwen-7B",
+"worldSize" : 2,
+...
+}
+]
+},
+}
+}
+```
+
+- 拉起服务化
+
+```shell
+cd /usr/local/Ascend/mindie/latest/mindie-service/bin
+./mindieservice_daemon
+```
+
+- 新建窗口测试(VLLM接口)
+
+```shell
+curl 127.0.0.1:1040/generate -d '{
+"prompt": "What's deep learning?",
+"max_tokens": 32,
+"stream": false,
+"do_sample":true,
+"repetition_penalty": 1.00,
+"temperature": 0.01,
+"top_p": 0.001,
+"top_k": 1,
+"model": "qwen"
+}'
+```
+
+> 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html)
+
+## 常见问题
+1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。
+
+```shell
+pip install transformers==4.46.3 --force-reinstall
+pip install numpy==1.26.4 --force-reinstall
+```
\ No newline at end of file
-- 
Gitee


From b0740f940b48fd63478f738a3880fef6c42578ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 09:38:37 +0800
Subject: [PATCH 02/18] update readme.md

---
 MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md | 4 ++--
 MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md  | 4 ++--
 MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 6 +++---
 MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md   | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md
index b1789e8e3e..1e9021ddee 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -11,7 +11,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 
 - [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/tree/main)
 
-### 加载镜像
+## 加载镜像
 前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
 
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
@@ -21,7 +21,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 * 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 * 支持TP=8推理
 
-### 新建容器
+## 新建容器
 
 目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Llama-70B模型推理脚本，无需再额外下载模型适配代码，直接新建容器即可。
 
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md
index 83caa1e4b9..d10cff9003 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -11,7 +11,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 
 - [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/tree/main)
 
-### 加载镜像
+## 加载镜像
 前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-8B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
 
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
@@ -20,7 +20,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 * 部署DeepSeek-R1-Distill-Llama-8B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 * 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 
-### 新建容器
+## 新建容器
 
 目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Llama-8B模型推理脚本，无需再额外下载模型适配代码，直接新建容器即可。
 
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 929e43b582..462f9b4fd0 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -1,5 +1,5 @@
 
-# DeepseekR1
+# DeepSeek-R1-Distill-Qwen-1.5B
 
 ## Usage
 
@@ -169,8 +169,8 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin
 - 新建窗口测试(VLLM接口)
 
 ```shell
-curl 127.0.0.1:1040/generate -d '{
-"prompt": "What's deep learning?",
+curl 127.0.0.1:1025/generate -d '{
+"prompt": "What is deep learning?",
 "max_tokens": 32,
 "stream": false,
 "do_sample":true,
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
index 33543fcdb2..316dacd025 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -1,5 +1,5 @@
 
-# DeepseekR1
+# DeepSeek-R1-Distill-Qwen-7B
 
 ## Usage
 
@@ -170,8 +170,8 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin
 - 新建窗口测试(VLLM接口)
 
 ```shell
-curl 127.0.0.1:1040/generate -d '{
-"prompt": "What's deep learning?",
+curl 127.0.0.1:1025/generate -d '{
+"prompt": "What is deep learning?",
 "max_tokens": 32,
 "stream": false,
 "do_sample":true,
-- 
Gitee


From 5746765e1f6f1320bd4fa41a0c8463dec97bd11d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 09:39:34 +0800
Subject: [PATCH 03/18] update readme.md

---
 MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 2 +-
 MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 462f9b4fd0..a6cd7f73ee 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -169,7 +169,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin
 - 新建窗口测试(VLLM接口)
 
 ```shell
-curl 127.0.0.1:1025/generate -d '{
+curl 127.0.0.1:1040/generate -d '{
 "prompt": "What is deep learning?",
 "max_tokens": 32,
 "stream": false,
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
index 316dacd025..3022fc4e13 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -170,7 +170,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin
 - 新建窗口测试(VLLM接口)
 
 ```shell
-curl 127.0.0.1:1025/generate -d '{
+curl 127.0.0.1:1040/generate -d '{
 "prompt": "What is deep learning?",
 "max_tokens": 32,
 "stream": false,
-- 
Gitee


From 136e81fdf941fc0236be90b11b23f5b7034c8d31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 10:15:11 +0800
Subject: [PATCH 04/18] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme=E7=9A=84?=
 =?UTF-8?q?=E4=BD=8D=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md            |  2 +-
 .../DeepSeek-R1-Distill-Llama-8B/README.md             |  4 +++-
 .../DeepSeek-R1-Distill-Qwen-1.5B/README.md            |  4 ++--
 .../DeepSeek-R1-Distill-Qwen-14B/README.md             | 10 +++++-----
 .../DeepSeek-R1-Distill-Qwen-32B/README.md             |  8 ++++----
 .../DeepSeek-R1-Distill-Qwen-7B/README.md              |  4 ++--
 6 files changed, 17 insertions(+), 15 deletions(-)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-70B/README.md (98%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-8B/README.md (95%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-1.5B/README.md (94%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-14B/README.md (90%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-32B/README.md (92%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-7B/README.md (94%)

diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
similarity index 98%
rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index 1e9021ddee..9b1979b3cf 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -19,7 +19,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 ## 约束条件
 * 部署DeepSeek-R1-Distill-Llama-70B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 * 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
-* 支持TP=8推理
+* Atlas 800I A2 32G服务器支持TP=8推理; Atlas 300I Duo服务器支持TP=8推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
similarity index 95%
rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index d10cff9003..80e222381f 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -3,7 +3,8 @@
 
 ## Usage
 
-Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Llama-8B is one of them. 
+
+
 
 ## 权重
 
@@ -19,6 +20,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 ## 约束条件
 * 部署DeepSeek-R1-Distill-Llama-8B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 * 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
+* Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
similarity index 94%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index a6cd7f73ee..90c4ab123e 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -3,7 +3,7 @@
 
 ## Usage
 
-We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model.
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-1.5B is one of them. 
 
 ## 权重
 
@@ -29,7 +29,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- 当前支持TP=1/2/4/8推理
+- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
similarity index 90%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-14B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index b24536f9cc..2230270135 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -1,9 +1,9 @@
 
-# DeepseekR1
+# DeepSeek-R1-Distill-Qwen-14B
 
 ## Usage
 
-We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model.
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-14B is one of them. 
 
 ## 权重
 
@@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要1台800I A2 32G服务器或1台Atlas 300I Duo服务器
+- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- 当前支持TP=1/2/4/8推理
+- Atlas 800I A2 32G服务器支持TP=2/4/8推理; Atlas 300I Duo服务器支持TP=2/4推理
 
 ## 新建容器
 
@@ -168,7 +168,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin
 
 ```shell
 curl 127.0.0.1:1040/generate -d '{
-"prompt": "What's deep learning?",
+"prompt": "What is deep learning?",
 "max_tokens": 32,
 "stream": false,
 "do_sample":true,
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
similarity index 92%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-32B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index a493ebd1bd..9dcd245611 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -1,9 +1,9 @@
 
-# DeepseekR1
+# DeepSeek-R1-Distill-Qwen-32B
 
 ## Usage
 
-We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model.
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-32B is one of them. 
 
 ## 权重
 
@@ -29,7 +29,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-32B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- 当前支持TP=1/2/4/8推理
+- Atlas 800I A2 32G服务器支持TP=4/8推理; Atlas 300I Duo服务器支持TP=4推理
 
 ## 新建容器
 
@@ -168,7 +168,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin
 
 ```shell
 curl 127.0.0.1:1040/generate -d '{
-"prompt": "What's deep learning?",
+"prompt": "What is deep learning?",
 "max_tokens": 32,
 "stream": false,
 "do_sample":true,
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
similarity index 94%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 3022fc4e13..f2bf8db692 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -3,7 +3,7 @@
 
 ## Usage
 
-We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model.
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-7B is one of them. 
 
 ## 权重
 
@@ -30,7 +30,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- 当前支持TP=1/2/4/8推理
+- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理
 
 ## 新建容器
 
-- 
Gitee


From 5b9b59fa374b8489ffe5e392f938f148fbf5a397 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 10:26:24 +0800
Subject: [PATCH 05/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md  | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md  | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md  | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md   | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index 9b1979b3cf..219695881a 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -17,9 +17,9 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
+* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
 * 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
-* Atlas 800I A2 32G服务器支持TP=8推理; Atlas 300I Duo服务器支持TP=8推理
+* 支持TP=8推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 80e222381f..e7076f47eb 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -18,9 +18,9 @@
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
+* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
 * 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
-* Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理
+* 支持TP=1/2/4/8推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 90c4ab123e..21f6f11d73 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
+- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理
+- 支持TP=1/2/4/8推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index 2230270135..56c9452178 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
+- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- Atlas 800I A2 32G服务器支持TP=2/4/8推理; Atlas 300I Duo服务器支持TP=2/4推理
+- 支持TP=2/4/8推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index 9dcd245611..a3225312cd 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-32B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
+- 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- Atlas 800I A2 32G服务器支持TP=4/8推理; Atlas 300I Duo服务器支持TP=4推理
+- 支持TP=4/8推理
 
 ## 新建容器
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index f2bf8db692..e58bf31d41 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -28,9 +28,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器
+- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
 - 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理
+- 支持TP=1/2/4/8推理
 
 ## 新建容器
 
-- 
Gitee


From 011088a66ac95f59fa413c224e83c3acfba23ca9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 10:33:02 +0800
Subject: [PATCH 06/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md  | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md  | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index 219695881a..d3440f50cc 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -18,7 +18,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 
 ## 约束条件
 * 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
-* 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
+* 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 * 支持TP=8推理
 
 ## 新建容器
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 21f6f11d73..f6f739fbb6 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
-- 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=1/2/4/8推理
 
 ## 新建容器
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index 56c9452178..c855b21687 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
-- 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=2/4/8推理
 
 ## 新建容器
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index a3225312cd..21bf588ccf 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
-- 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=4/8推理
 
 ## 新建容器
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index e58bf31d41..b3d515dbf4 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -29,7 +29,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
-- 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=1/2/4/8推理
 
 ## 新建容器
-- 
Gitee


From 0ad1dbb76a1fd4eb81137067a8da296d1898d91f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 10:39:07 +0800
Subject: [PATCH 07/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md  | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md  | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md  | 2 +-
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md   | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index d3440f50cc..c9e89f3e9c 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -17,7 +17,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
+* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插4张Atlas 300I DUO卡的服务器`
 * 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 * 支持TP=8推理
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index e7076f47eb..efd4330d4a 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -18,8 +18,8 @@
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
-* 在Atlas 300I Duo服务器部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
+* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
+* 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 * 支持TP=1/2/4/8推理
 
 ## 新建容器
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index f6f739fbb6..967fe1d157 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -27,7 +27,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
+- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=1/2/4/8推理
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index c855b21687..3029ae6067 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -27,7 +27,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
+- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=2/4/8推理
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index 21bf588ccf..fdacc9f0e1 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -27,7 +27,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
+- 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插2张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=4/8推理
 
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index b3d515dbf4..dd96e2809b 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 ```
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器`
+- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=1/2/4/8推理
 
-- 
Gitee


From 3b429462150f1d733e54a6d810fa72fcfbd34636 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 11:07:05 +0800
Subject: [PATCH 08/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index efd4330d4a..f2e38076d1 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -3,7 +3,7 @@
 
 ## Usage
 
-
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Llama-8B is one of them. 
 
 
 ## 权重
-- 
Gitee


From 7d90d6a1ad378da317dc3b12613a538dee14602e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 6 Feb 2025 11:08:15 +0800
Subject: [PATCH 09/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index f2e38076d1..2f0f50229f 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -5,7 +5,6 @@
 
 Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Llama-8B is one of them. 
 
-
 ## 权重
 
 **权重下载**
-- 
Gitee


From c72f9c05ea03b3af2c87026a5b9590ffe01b6794 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Fri, 7 Feb 2025 09:14:55 +0800
Subject: [PATCH 10/18] =?UTF-8?q?=E6=96=B0=E5=A2=9Eqwen=E7=9A=84=E9=87=8F?=
 =?UTF-8?q?=E5=8C=96=E6=96=B9=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../README.md                                 |   0
 .../requirements.txt                          |  48 +--
 .../README.md                                 | 342 ++++++++---------
 .../requirements.txt                          |  48 +--
 .../DeepSeek-R1-Distill-Qwen-1.5B/README.md   |   0
 .../README.md                                 | 350 +++++++++---------
 .../requirements.txt                          |  48 +--
 .../DeepSeek-R1-Distill-Qwen-7B/README.md     |  17 +
 8 files changed, 435 insertions(+), 418 deletions(-)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md (100%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt (94%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md (96%)
 rename MindIE/LLM/{DeepSeek-R1-Distill-Qwen-7B-OrangePi => DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi}/requirements.txt (94%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-1.5B/README.md (100%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md (96%)
 rename MindIE/LLM/{DeepSeek-R1-Distill-Qwen-1.5B-OrangePi => DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi}/requirements.txt (94%)
 rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-7B/README.md (82%)

diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md
similarity index 100%
rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt
similarity index 94%
rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt
index b4ebc76e54..d3cf990cf3 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt
@@ -1,25 +1,25 @@
-attrs==24.3.0
-certifi==2025.1.31
-charset-normalizer==3.4.1
-decorator==5.1.1
-filelock==3.17.0
-fsspec==2025.2.0
-huggingface-hub==0.28.1
-idna==3.10
-Jinja2==3.1.5
-MarkupSafe==3.0.2
-mpmath==1.3.0
-networkx==3.4.2
-numpy==1.26.0
-packaging==24.2
-psutil==6.1.1
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.3
-safetensors==0.5.2
-scipy==1.15.1
-sympy==1.13.3
-tokenizers==0.20.3
-transformers==4.45.1
-typing_extensions==4.12.2
+attrs==24.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+decorator==5.1.1
+filelock==3.17.0
+fsspec==2025.2.0
+huggingface-hub==0.28.1
+idna==3.10
+Jinja2==3.1.5
+MarkupSafe==3.0.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==1.26.0
+packaging==24.2
+psutil==6.1.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.2
+scipy==1.15.1
+sympy==1.13.3
+tokenizers==0.20.3
+transformers==4.45.1
+typing_extensions==4.12.2
 urllib3==2.3.0
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
similarity index 96%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
index 80e0dbeb2e..19808ec46e 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
@@ -1,171 +1,171 @@
-# DeepSeek-R1-Distill-Qwen-1.5B
-
-## Usage
-
-Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-1.5B is one of them. 
-
-## 约束条件
-* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-1.5B模型
-* 需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096
-* 由于此硬件为单卡，仅支持TP=1
-
-## 权重
-
-**权重下载**
-
-- [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/tree/main)
-
-## 新建环境
-
-### 1.1 安装CANN
-- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann)
-- 安装顺序：先安装toolkit 再安装kernel
-
-#### 1.1.1 安装toolkit
-
-- 下载
-
-| cpu     | 包名（其中`${version}`为实际版本）                 |
-| ------- | ------------------------------------------------ |
-| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run |
-
-- 安装
-  ```bash
-  # 安装toolkit  以arm为例
-  chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run
-  ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install
-  source /usr/local/Ascend/ascend-toolkit/set_env.sh
-  ```
-
-#### 1.1.2 安装kernel
-
-- 下载
-
-| 包名                                       |
-| ------------------------------------------ |
-| Ascend-cann-kernels*_${version}_linux.run |
-
-  - 根据芯片型号选择对应的安装包
-
-- 安装
-  ```bash
-  chmod +x Ascend-cann-kernels-*_${version}_linux.run
-  ./Ascend-cann-kernels-*_${version}_linux.run --install
-  ```
-
-#### 1.1.3 安装加速库
-- 下载加速库
-  - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。
-
-  | 包名（其中`${version}`为实际版本）            |
-  | -------------------------------------------- |
-  | Ascend-cann-nnal_${version}_linux-aarch64.run |
-  | ...                                          |
-
-  - 将文件放置在\${working_dir}路径下
-
-- 安装
-    ```shell
-    chmod +x Ascend-cann-nnal_*_linux-*.run
-    ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir}
-    source ${working_dir}/nnal/atb/set_env.sh
-    ```
-- 可以使用`uname -a`指令查看服务器是x86还是aarch架构
-- 可以使用以下指令查看abi是0还是1
-    ```shell
-    python -c "import torch; print(torch.compiled_with_cxx11_abi())"
-    ```
-    - 若输出结果为True表示abi1，False表示abi0
-
-### 1.2 安装PytorchAdapter
-
-先安装torch 再安装torch_npu
-
-#### 1.2.1 安装torch
-
-- 下载
-
-  | 包名                                         |
-  | -------------------------------------------- |
-  | torch-2.1.0-cp310-cp10-linux_aarch64.whl     |
-  | ...                                          |
-
-  - 根据所使用的环境中的python版本以及cpu类型，选择对应版本的torch安装包。
-
-- 安装
-  ```bash
-  # 安装torch 2.1.0 的python 3.10 的arm版本为例
-  pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl
-  ```
-
-#### 1.2.2 安装torch_npu
-
-[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt)，安装方法：
-
-| 包名                        |
-| --------------------------- |
-| pytorch_v2.1.0_py38.tar.gz |
-| pytorch_v2.1.0_py39.tar.gz |
-| pytorch_v2.1.0_py310.tar.gz |
-| ...                         |
-
-- 安装选择与torch版本以及python版本一致的npu_torch版本
-
-```bash
-# 安装 torch_npu，以 torch 2.1.0，python 3.10 的版本为例
-tar -zxvf pytorch_v2.1.0_py310.tar.gz
-pip install torch*_aarch64.whl
-```
-### 1.3 安装开源软件依赖
-| 默认依赖                 | [requirement.txt](./requirements.txt)           |
-- 开源软件依赖请使用下述命令进行安装：
-  ```bash
-  pip install -r ./requirements.txt
-  ```
-
-### 1.4 安装模型仓
-使用编译好的包进行安装
-  - 下载编译好的包
-    - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann)
-
-    | 包名                                                         |
-    | ------------------------------------------------------------ |
-    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz |
-    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz |
-    | ...                                                          |
-
-  - 将文件放置在\${working_dir}路径下
-  - 解压
-    ```shell
-    cd ${working_dir}
-    mkdir MindIE-LLM
-    cd MindIE-LLM
-    tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz
-    ```
-  - 安装atb_llm whl包
-    ```
-    cd ${working_dir}/MindIE-LLM
-    # 首次安装
-    pip install atb_llm-0.0.1-py3-none-any.whl
-    # 更新
-    pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall
-    ```
-
-
-## 模型推理
-
-### 对话测试
-进入llm_model路径
-
-```shell
-cd $ATB_SPEED_HOME_PATH
-```
-
-执行对话测试
-
-```shell
-python   -m examples.run_fa_edge \
-         --model_path ${权重路径} \
-         --input_text 'What is deep learning?' \
-         --max_output_length 20 \
-```
+# DeepSeek-R1-Distill-Qwen-1.5B
+
+## Usage
+
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-1.5B is one of them. 
+
+## 约束条件
+* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-1.5B模型
+* 需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096
+* 由于此硬件为单卡，仅支持TP=1
+
+## 权重
+
+**权重下载**
+
+- [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/tree/main)
+
+## 新建环境
+
+### 1.1 安装CANN
+- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann)
+- 安装顺序：先安装toolkit 再安装kernel
+
+#### 1.1.1 安装toolkit
+
+- 下载
+
+| cpu     | 包名（其中`${version}`为实际版本）                 |
+| ------- | ------------------------------------------------ |
+| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run |
+
+- 安装
+  ```bash
+  # 安装toolkit  以arm为例
+  chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run
+  ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install
+  source /usr/local/Ascend/ascend-toolkit/set_env.sh
+  ```
+
+#### 1.1.2 安装kernel
+
+- 下载
+
+| 包名                                       |
+| ------------------------------------------ |
+| Ascend-cann-kernels*_${version}_linux.run |
+
+  - 根据芯片型号选择对应的安装包
+
+- 安装
+  ```bash
+  chmod +x Ascend-cann-kernels-*_${version}_linux.run
+  ./Ascend-cann-kernels-*_${version}_linux.run --install
+  ```
+
+#### 1.1.3 安装加速库
+- 下载加速库
+  - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。
+
+  | 包名（其中`${version}`为实际版本）            |
+  | -------------------------------------------- |
+  | Ascend-cann-nnal_${version}_linux-aarch64.run |
+  | ...                                          |
+
+  - 将文件放置在\${working_dir}路径下
+
+- 安装
+    ```shell
+    chmod +x Ascend-cann-nnal_*_linux-*.run
+    ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir}
+    source ${working_dir}/nnal/atb/set_env.sh
+    ```
+- 可以使用`uname -a`指令查看服务器是x86还是aarch架构
+- 可以使用以下指令查看abi是0还是1
+    ```shell
+    python -c "import torch; print(torch.compiled_with_cxx11_abi())"
+    ```
+    - 若输出结果为True表示abi1，False表示abi0
+
+### 1.2 安装PytorchAdapter
+
+先安装torch 再安装torch_npu
+
+#### 1.2.1 安装torch
+
+- 下载
+
+  | 包名                                         |
+  | -------------------------------------------- |
+  | torch-2.1.0-cp310-cp10-linux_aarch64.whl     |
+  | ...                                          |
+
+  - 根据所使用的环境中的python版本以及cpu类型，选择对应版本的torch安装包。
+
+- 安装
+  ```bash
+  # 安装torch 2.1.0 的python 3.10 的arm版本为例
+  pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl
+  ```
+
+#### 1.2.2 安装torch_npu
+
+[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt)，安装方法：
+
+| 包名                        |
+| --------------------------- |
+| pytorch_v2.1.0_py38.tar.gz |
+| pytorch_v2.1.0_py39.tar.gz |
+| pytorch_v2.1.0_py310.tar.gz |
+| ...                         |
+
+- 安装选择与torch版本以及python版本一致的npu_torch版本
+
+```bash
+# 安装 torch_npu，以 torch 2.1.0，python 3.10 的版本为例
+tar -zxvf pytorch_v2.1.0_py310.tar.gz
+pip install torch*_aarch64.whl
+```
+### 1.3 安装开源软件依赖
+| 默认依赖                 | [requirement.txt](./requirements.txt)           |
+- 开源软件依赖请使用下述命令进行安装：
+  ```bash
+  pip install -r ./requirements.txt
+  ```
+
+### 1.4 安装模型仓
+使用编译好的包进行安装
+  - 下载编译好的包
+    - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann)
+
+    | 包名                                                         |
+    | ------------------------------------------------------------ |
+    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz |
+    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz |
+    | ...                                                          |
+
+  - 将文件放置在\${working_dir}路径下
+  - 解压
+    ```shell
+    cd ${working_dir}
+    mkdir MindIE-LLM
+    cd MindIE-LLM
+    tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz
+    ```
+  - 安装atb_llm whl包
+    ```
+    cd ${working_dir}/MindIE-LLM
+    # 首次安装
+    pip install atb_llm-0.0.1-py3-none-any.whl
+    # 更新
+    pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall
+    ```
+
+
+## 模型推理
+
+### 对话测试
+进入llm_model路径
+
+```shell
+cd $ATB_SPEED_HOME_PATH
+```
+
+执行对话测试
+
+```shell
+python   -m examples.run_fa_edge \
+         --model_path ${权重路径} \
+         --input_text 'What is deep learning?' \
+         --max_output_length 20 \
+```
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt
similarity index 94%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt
index b4ebc76e54..d3cf990cf3 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt
@@ -1,25 +1,25 @@
-attrs==24.3.0
-certifi==2025.1.31
-charset-normalizer==3.4.1
-decorator==5.1.1
-filelock==3.17.0
-fsspec==2025.2.0
-huggingface-hub==0.28.1
-idna==3.10
-Jinja2==3.1.5
-MarkupSafe==3.0.2
-mpmath==1.3.0
-networkx==3.4.2
-numpy==1.26.0
-packaging==24.2
-psutil==6.1.1
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.3
-safetensors==0.5.2
-scipy==1.15.1
-sympy==1.13.3
-tokenizers==0.20.3
-transformers==4.45.1
-typing_extensions==4.12.2
+attrs==24.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+decorator==5.1.1
+filelock==3.17.0
+fsspec==2025.2.0
+huggingface-hub==0.28.1
+idna==3.10
+Jinja2==3.1.5
+MarkupSafe==3.0.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==1.26.0
+packaging==24.2
+psutil==6.1.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.2
+scipy==1.15.1
+sympy==1.13.3
+tokenizers==0.20.3
+transformers==4.45.1
+typing_extensions==4.12.2
 urllib3==2.3.0
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
similarity index 100%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
similarity index 96%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
index 629145e95c..2c489dbc58 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
@@ -1,175 +1,175 @@
-# DeepSeek-R1-Distill-Qwen-7B
-
-## Usage
-
-Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-7B is one of them. 
-## 约束条件
-* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-7B模型
-* 需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096
-* 由于此硬件为单卡，仅支持TP=1
-
-## 权重
-
-**权重下载**
-
-- [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/tree/main)
-
-## 新建环境
-
-### 1.1 安装CANN
-- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann)
-- 安装顺序：先安装toolkit 再安装kernel
-
-#### 1.1.1 安装toolkit
-
-- 下载
-
-| cpu     | 包名（其中`${version}`为实际版本）                 |
-| ------- | ------------------------------------------------ |
-| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run |
-
-- 安装
-  ```bash
-  # 安装toolkit  以arm为例
-  chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run
-  ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install
-  source /usr/local/Ascend/ascend-toolkit/set_env.sh
-  ```
-
-#### 1.1.2 安装kernel
-
-- 下载
-
-| 包名                                       |
-| ------------------------------------------ |
-| Ascend-cann-kernels*_${version}_linux.run |
-
-  - 根据芯片型号选择对应的安装包
-
-- 安装
-  ```bash
-  chmod +x Ascend-cann-kernels-*_${version}_linux.run
-  ./Ascend-cann-kernels-*_${version}_linux.run --install
-  ```
-
-#### 1.1.3 安装加速库
-- 下载加速库
-  - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。
-
-  | 包名（其中`${version}`为实际版本）            |
-  | -------------------------------------------- |
-  | Ascend-cann-nnal_${version}_linux-aarch64.run |
-  | Ascend-cann-nnal_${version}_linux-x86_64.run  |
-  | ...                                          |
-
-  - 将文件放置在\${working_dir}路径下
-
-- 安装
-    ```shell
-    chmod +x Ascend-cann-nnal_*_linux-*.run
-    ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir}
-    source ${working_dir}/nnal/atb/set_env.sh
-    ```
-- 可以使用`uname -a`指令查看服务器是x86还是aarch架构
-- 可以使用以下指令查看abi是0还是1
-    ```shell
-    python -c "import torch; print(torch.compiled_with_cxx11_abi())"
-    ```
-    - 若输出结果为True表示abi1，False表示abi0
-
-### 1.2 安装PytorchAdapter
-
-先安装torch 再安装torch_npu
-
-#### 1.2.1 安装torch
-
-- 下载
-
-  | 包名                                         |
-  | -------------------------------------------- |
-  | torch-2.1.0+cpu-cp310-cp310-linux_x86_64.whl |
-  | torch-2.1.0-cp310-cp10-linux_aarch64.whl     |
-  | ...                                          |
-
-  - 根据所使用的环境中的python版本以及cpu类型，选择对应版本的torch安装包。
-
-- 安装
-  ```bash
-  # 安装torch 2.1.0 的python 3.10 的arm版本为例
-  pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl
-  ```
-
-#### 1.2.2 安装torch_npu
-
-[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt)，安装方法：
-
-| 包名                        |
-| --------------------------- |
-| pytorch_v2.1.0_py38.tar.gz |
-| pytorch_v2.1.0_py39.tar.gz |
-| pytorch_v2.1.0_py310.tar.gz |
-| ...                         |
-
-- 安装选择与torch版本以及python版本一致的npu_torch版本
-
-```bash
-# 安装 torch_npu，以 torch 2.1.0，python 3.10 的版本为例
-tar -zxvf pytorch_v2.1.0_py310.tar.gz
-pip install torch*_aarch64.whl
-```
-### 1.3 安装开源软件依赖
-| 默认依赖                 | [requirement.txt](./requirements.txt)           |
-- 开源软件依赖请使用下述命令进行安装：
-  ```bash
-  pip install -r ./requirements.txt
-  ```
-
-### 1.4 安装模型仓
-使用编译好的包进行安装
-  - 下载编译好的包
-    - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann)
-
-    | 包名                                                         |
-    | ------------------------------------------------------------ |
-    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz |
-    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz |
-    | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch1.11.0-abi1.tar.gz |
-    | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch2.1.0-abi1.tar.gz |
-    | ...                                                          |
-
-  - 将文件放置在\${working_dir}路径下
-  - 解压
-    ```shell
-    cd ${working_dir}
-    mkdir MindIE-LLM
-    cd MindIE-LLM
-    tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz
-    ```
-  - 安装atb_llm whl包
-    ```
-    cd ${working_dir}/MindIE-LLM
-    # 首次安装
-    pip install atb_llm-0.0.1-py3-none-any.whl
-    # 更新
-    pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall
-    ```
-
-
-## 纯模型推理
-
-### 对话测试
-进入llm_model路径
-
-```shell
-cd $ATB_SPEED_HOME_PATH
-```
-
-执行对话测试
-
-```shell
-python   -m examples.run_fa_edge \
-         --model_path ${权重路径} \
-         --input_text 'What is deep learning?' \
-         --max_output_length 20 \
-```
-
+# DeepSeek-R1-Distill-Qwen-7B
+
+## Usage
+
+Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-7B is one of them. 
+## 约束条件
+* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-7B模型
+* 需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096
+* 由于此硬件为单卡，仅支持TP=1
+
+## 权重
+
+**权重下载**
+
+- [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/tree/main)
+
+## 新建环境
+
+### 1.1 安装CANN
+- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann)
+- 安装顺序：先安装toolkit 再安装kernel
+
+#### 1.1.1 安装toolkit
+
+- 下载
+
+| cpu     | 包名（其中`${version}`为实际版本）                 |
+| ------- | ------------------------------------------------ |
+| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run |
+
+- 安装
+  ```bash
+  # 安装toolkit  以arm为例
+  chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run
+  ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install
+  source /usr/local/Ascend/ascend-toolkit/set_env.sh
+  ```
+
+#### 1.1.2 安装kernel
+
+- 下载
+
+| 包名                                       |
+| ------------------------------------------ |
+| Ascend-cann-kernels*_${version}_linux.run |
+
+  - 根据芯片型号选择对应的安装包
+
+- 安装
+  ```bash
+  chmod +x Ascend-cann-kernels-*_${version}_linux.run
+  ./Ascend-cann-kernels-*_${version}_linux.run --install
+  ```
+
+#### 1.1.3 安装加速库
+- 下载加速库
+  - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。
+
+  | 包名（其中`${version}`为实际版本）            |
+  | -------------------------------------------- |
+  | Ascend-cann-nnal_${version}_linux-aarch64.run |
+  | Ascend-cann-nnal_${version}_linux-x86_64.run  |
+  | ...                                          |
+
+  - 将文件放置在\${working_dir}路径下
+
+- 安装
+    ```shell
+    chmod +x Ascend-cann-nnal_*_linux-*.run
+    ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir}
+    source ${working_dir}/nnal/atb/set_env.sh
+    ```
+- 可以使用`uname -a`指令查看服务器是x86还是aarch架构
+- 可以使用以下指令查看abi是0还是1
+    ```shell
+    python -c "import torch; print(torch.compiled_with_cxx11_abi())"
+    ```
+    - 若输出结果为True表示abi1，False表示abi0
+
+### 1.2 安装PytorchAdapter
+
+先安装torch 再安装torch_npu
+
+#### 1.2.1 安装torch
+
+- 下载
+
+  | 包名                                         |
+  | -------------------------------------------- |
+  | torch-2.1.0+cpu-cp310-cp310-linux_x86_64.whl |
+  | torch-2.1.0-cp310-cp10-linux_aarch64.whl     |
+  | ...                                          |
+
+  - 根据所使用的环境中的python版本以及cpu类型，选择对应版本的torch安装包。
+
+- 安装
+  ```bash
+  # 安装torch 2.1.0 的python 3.10 的arm版本为例
+  pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl
+  ```
+
+#### 1.2.2 安装torch_npu
+
+[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt)，安装方法：
+
+| 包名                        |
+| --------------------------- |
+| pytorch_v2.1.0_py38.tar.gz |
+| pytorch_v2.1.0_py39.tar.gz |
+| pytorch_v2.1.0_py310.tar.gz |
+| ...                         |
+
+- 安装选择与torch版本以及python版本一致的npu_torch版本
+
+```bash
+# 安装 torch_npu，以 torch 2.1.0，python 3.10 的版本为例
+tar -zxvf pytorch_v2.1.0_py310.tar.gz
+pip install torch*_aarch64.whl
+```
+### 1.3 安装开源软件依赖
+| 默认依赖                 | [requirement.txt](./requirements.txt)           |
+- 开源软件依赖请使用下述命令进行安装：
+  ```bash
+  pip install -r ./requirements.txt
+  ```
+
+### 1.4 安装模型仓
+使用编译好的包进行安装
+  - 下载编译好的包
+    - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann)
+
+    | 包名                                                         |
+    | ------------------------------------------------------------ |
+    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz |
+    | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz |
+    | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch1.11.0-abi1.tar.gz |
+    | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch2.1.0-abi1.tar.gz |
+    | ...                                                          |
+
+  - 将文件放置在\${working_dir}路径下
+  - 解压
+    ```shell
+    cd ${working_dir}
+    mkdir MindIE-LLM
+    cd MindIE-LLM
+    tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz
+    ```
+  - 安装atb_llm whl包
+    ```
+    cd ${working_dir}/MindIE-LLM
+    # 首次安装
+    pip install atb_llm-0.0.1-py3-none-any.whl
+    # 更新
+    pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall
+    ```
+
+
+## 纯模型推理
+
+### 对话测试
+进入llm_model路径
+
+```shell
+cd $ATB_SPEED_HOME_PATH
+```
+
+执行对话测试
+
+```shell
+python   -m examples.run_fa_edge \
+         --model_path ${权重路径} \
+         --input_text 'What is deep learning?' \
+         --max_output_length 20 \
+```
+
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt
similarity index 94%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt
index b4ebc76e54..d3cf990cf3 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt
@@ -1,25 +1,25 @@
-attrs==24.3.0
-certifi==2025.1.31
-charset-normalizer==3.4.1
-decorator==5.1.1
-filelock==3.17.0
-fsspec==2025.2.0
-huggingface-hub==0.28.1
-idna==3.10
-Jinja2==3.1.5
-MarkupSafe==3.0.2
-mpmath==1.3.0
-networkx==3.4.2
-numpy==1.26.0
-packaging==24.2
-psutil==6.1.1
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.3
-safetensors==0.5.2
-scipy==1.15.1
-sympy==1.13.3
-tokenizers==0.20.3
-transformers==4.45.1
-typing_extensions==4.12.2
+attrs==24.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+decorator==5.1.1
+filelock==3.17.0
+fsspec==2025.2.0
+huggingface-hub==0.28.1
+idna==3.10
+Jinja2==3.1.5
+MarkupSafe==3.0.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==1.26.0
+packaging==24.2
+psutil==6.1.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.2
+scipy==1.15.1
+sympy==1.13.3
+tokenizers==0.20.3
+transformers==4.45.1
+typing_extensions==4.12.2
 urllib3==2.3.0
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
similarity index 82%
rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index dd96e2809b..8da84957cb 100644
--- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -185,6 +185,23 @@ curl 127.0.0.1:1040/generate -d '{
 
 > 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html)
 
+## Atlas 800I A2 量化
+Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)（昇腾压缩加速工具）实现。
+- 注意该量化方式仅支持在Atlas 800I A2服务器上运行
+- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
+- git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
+- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`；并在进入的msmodelslim目录下，运行安装脚本 `bash install.sh`;
+- 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`；并在进入的Qwen目录下，运行量化转换脚本
+```bash
+python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 8 --a_bit 8 --device_type npu  
+```
+- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。
+- 如果需要使用npu多卡量化，请先配置环境变量，支持多卡量化,建议双卡执行量化：
+```bash
+export ASCEND_RT_VISIBLE_DEVICES=0,1
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
+```
+
 ## 常见问题
 1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。
 
-- 
Gitee


From 9fb7647e7c2a504c483f0e7543ea1b4c72a4d3d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Fri, 7 Feb 2025 09:30:24 +0800
Subject: [PATCH 11/18] fix

---
 .../DeepSeek-R1-Distill-Qwen-7B/README.md     | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 8da84957cb..6aea2621c4 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -80,6 +80,23 @@ docker run -it -d --net=host --shm-size=1g \
 docker exec -it ${容器名称} bash
 ```
 
+## Atlas 800I A2 量化
+Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)（昇腾压缩加速工具）实现。
+- 注意该量化方式仅支持在Atlas 800I A2服务器上运行
+- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
+- git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
+- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`；并在进入的msmodelslim目录下，运行安装脚本 `bash install.sh`;
+- 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`；并在进入的Qwen目录下，运行量化转换脚本
+```bash
+python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 8 --a_bit 8 --device_type npu  
+```
+- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。
+- 如果需要使用npu多卡量化，请先配置环境变量，支持多卡量化,建议双卡执行量化：
+```bash
+export ASCEND_RT_VISIBLE_DEVICES=0,1
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
+```
+
 ## 纯模型推理
 
 ### 对话测试
@@ -185,23 +202,6 @@ curl 127.0.0.1:1040/generate -d '{
 
 > 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html)
 
-## Atlas 800I A2 量化
-Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)（昇腾压缩加速工具）实现。
-- 注意该量化方式仅支持在Atlas 800I A2服务器上运行
-- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
-- git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
-- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`；并在进入的msmodelslim目录下，运行安装脚本 `bash install.sh`;
-- 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`；并在进入的Qwen目录下，运行量化转换脚本
-```bash
-python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 8 --a_bit 8 --device_type npu  
-```
-- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。
-- 如果需要使用npu多卡量化，请先配置环境变量，支持多卡量化,建议双卡执行量化：
-```bash
-export ASCEND_RT_VISIBLE_DEVICES=0,1
-export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
-```
-
 ## 常见问题
 1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。
 
-- 
Gitee


From fd698198b3efa043dd63f30b64c9c571f80d40f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Fri, 7 Feb 2025 17:38:10 +0800
Subject: [PATCH 12/18] =?UTF-8?q?update=20deepseek-qwen-7b=20=E9=87=8F?=
 =?UTF-8?q?=E5=8C=96=E7=B1=BB=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../DeepSeek-R1-Distill-Qwen-7B/README.md     | 37 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 6aea2621c4..e04eba4870 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -80,8 +80,9 @@ docker run -it -d --net=host --shm-size=1g \
 docker exec -it ${容器名称} bash
 ```
 
-## Atlas 800I A2 量化
-Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)（昇腾压缩加速工具）实现。
+## 权重量化
+### W8A8量化
+W8A8量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)（昇腾压缩加速工具）实现。
 - 注意该量化方式仅支持在Atlas 800I A2服务器上运行
 - 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
 - git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
@@ -96,6 +97,38 @@ python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8
 export ASCEND_RT_VISIBLE_DEVICES=0,1
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ```
+### 稀疏量化
+  - Step 1
+    - 注意该量化方式仅支持在Atlas 300I DUO推理卡上运行
+    - 修改模型权重config.json中`torch_dtype`字段为`float16`
+    - 下载msmodelslim量化工具
+    - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
+    - 根据msmodelslim量化工具readme进行相关操作
+    注： 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空
+    ```shell
+    # 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
+    jq --version
+    # 设置CANN包的环境变量
+    source /usr/local/Ascend/ascend-toolkit/set_env.sh
+    cd ${llm_path}
+    # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
+    # 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
+    vi examples/models/qwen/convert_quant_weight.sh
+    bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w4a8
+    ```
+
+  - Step 2：量化权重切分及压缩
+    ```shell
+    export IGNORE_INFER_ERROR=1
+    torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径}
+    ```
+    - TP数为tensor parallel并行个数
+    - 注意：若权重生成时以TP=2进行切分，则运行时也需以TP=2运行
+    - 示例
+      ```shell
+        torchrun --nproc_per_node 2 -m examples.convert.model_slim.sparse_compressor --model_path /data1/weights/model_slim/Qwen-7b_w8a8s --save_directory /data1/weights/model_slim/Qwen-7b_w8a8sc
+      ```
+
 
 ## 纯模型推理
 
-- 
Gitee


From 547bc78e16d56421f7aa83e5edc267e5e52c6ebe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Sat, 8 Feb 2025 17:15:15 +0800
Subject: [PATCH 13/18] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E7=A8=80=E7=96=8F?=
 =?UTF-8?q?=E9=87=8F=E5=8C=96=E7=9A=84=E9=85=8D=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index e04eba4870..d310ab5f67 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -104,6 +104,7 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
     - 下载msmodelslim量化工具
     - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
     - 根据msmodelslim量化工具readme进行相关操作
+    - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/qwen的目录 `cd msit/msmodelslim/example/Qwen`
     注： 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空
     ```shell
     # 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
@@ -113,8 +114,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
     cd ${llm_path}
     # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
     # 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
-    vi examples/models/qwen/convert_quant_weight.sh
-    bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w4a8
+    # 运行量化转换脚本
+    python3 quant_qwen.py --model_path {} --save_directory {} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True
     ```
 
   - Step 2：量化权重切分及压缩
-- 
Gitee


From c7bf249343ae79ea0ec5cb7b0be8f8becf52387d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Sat, 8 Feb 2025 17:30:24 +0800
Subject: [PATCH 14/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index d310ab5f67..712dae6289 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -109,13 +109,9 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
     ```shell
     # 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
     jq --version
-    # 设置CANN包的环境变量
-    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    cd ${llm_path}
-    # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
-    # 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
+    
     # 运行量化转换脚本
-    python3 quant_qwen.py --model_path {} --save_directory {} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True
+    python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True
     ```
 
   - Step 2：量化权重切分及压缩
-- 
Gitee


From 87f8c394308bccc694bfa2782daf42a1da26bc25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Sat, 8 Feb 2025 17:42:02 +0800
Subject: [PATCH 15/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 712dae6289..617438f4cb 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -109,7 +109,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
     ```shell
     # 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
     jq --version
-    
+    # 指定当前机器上可用的逻辑NPU核心 通过修改export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
+    export ASCEND_RT_VISIBLE_DEVICES=0
     # 运行量化转换脚本
     python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True
     ```
-- 
Gitee


From 9e3344d09e509ae8dab5f791423e555be6fcf617 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Fri, 14 Feb 2025 16:36:18 +0800
Subject: [PATCH 16/18] =?UTF-8?q?=E6=96=B0=E5=A2=9E1.5B=20=E9=87=8F?=
 =?UTF-8?q?=E5=8C=96=E6=96=B9=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../DeepSeek-R1-Distill-Qwen-1.5B/README.md   | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index cd61e32d6a..382f1aa591 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -87,6 +87,58 @@ docker run -it -d --net=host --shm-size=1g \
 ```shell
 docker exec -it ${容器名称} bash
 ```
+## 权重量化
+### W8A8量化
+W8A8量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)（昇腾压缩加速工具）实现。
+- 注意该量化方式仅支持在Atlas 800I A2服务器上运行
+- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
+- git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
+- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`；并在进入的msmodelslim目录下，运行安装脚本 `bash install.sh`;
+```bash
+# 设置CANN包的环境变量
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+cd ${llm_path}
+# 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
+# 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
+vi examples/models/qwen/convert_quant_weight.sh
+bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w8a8
+```
+- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。
+- 如果需要使用npu多卡量化，请先配置环境变量，支持多卡量化,建议双卡执行量化：
+```bash
+export ASCEND_RT_VISIBLE_DEVICES=0,1
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
+```
+### 稀疏量化
+  - Step 1
+    - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
+    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
+    - 修改模型权重config.json中`torch_dtype`字段为`float16`
+    - 下载msmodelslim量化工具
+    - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
+    - 根据msmodelslim量化工具readme进行相关操作
+    - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`
+    注： 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空
+    ```shell
+    # 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
+    jq --version
+    
+    # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
+    export ASCEND_RT_VISIBLE_DEVICES=0
+    python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True
+    ```
+
+  - Step 2：量化权重切分及压缩
+    ```shell
+    export IGNORE_INFER_ERROR=1
+    torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径}
+    ```
+    - TP数为tensor parallel并行个数
+    - 注意：若权重生成时以TP=2进行切分，则运行时也需以TP=2运行
+    - 示例
+      ```shell
+        torchrun --nproc_per_node 2 -m examples.convert.model_slim.sparse_compressor --model_path /data1/weights/model_slim/Qwen-7b_w8a8s --save_directory /data1/weights/model_slim/Qwen-7b_w8a8sc
+      ```
 
 ## 纯模型推理
 
-- 
Gitee


From abfbdaa65773a0e0c5873a7e2bbaea4231a7295c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Fri, 14 Feb 2025 16:57:24 +0800
Subject: [PATCH 17/18] fix

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 382f1aa591..d85d9d1896 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -99,7 +99,6 @@ W8A8量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 cd ${llm_path}
 # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 
-# 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
 vi examples/models/qwen/convert_quant_weight.sh
 bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w8a8
 ```
-- 
Gitee


From 0fb5df432b9511244f46fb1a74b9a5f5410418e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= <yeyifan@huawei.com>
Date: Thu, 10 Apr 2025 10:36:28 +0800
Subject: [PATCH 18/18] =?UTF-8?q?=E5=88=B7=E6=96=B0readme?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md  | 4 ++--
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index d85d9d1896..8e1a814c66 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -110,8 +110,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ```
 ### 稀疏量化
   - Step 1
-    - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
+    - 注意该量化方式仅支持在Atlas 300I DUO卡上运行
+    - Atlas 300I DUO不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 下载msmodelslim量化工具
     - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 2481f11883..cd6cbc560d 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -37,8 +37,8 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 | HDK | 24.1.0 |
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`或者`1台插1张Atlas 300I Pro推理卡的服务器`或者`1台插1张Atlas 300V视频解析卡的服务器`
-- 在使用Atlas 300I DUO/Atlas 300I Pro推理卡和Atlas 300V视频解析卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
+- 在使用Atlas 300I DUO卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=1/2/4/8推理
 
 ## 新建容器
@@ -108,8 +108,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ```
 ### 稀疏量化
   - Step 1
-    - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
+    - 注意该量化方式仅支持在Atlas 300I DUO卡上运行
+    - Atlas 300I DUO不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 下载msmodelslim量化工具
     - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
-- 
Gitee