From d8451fe981391b822a798754219c51c5ab1f3f60 Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Thu, 6 Feb 2025 22:07:35 +0800
Subject: [PATCH 01/12] add w8a8

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md   | 20 +++++++++++++++++++
 .../DeepSeek-R1-Distill-Llama-8B/README.md    | 19 ++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index c9e89f3e9c..c5e0bf9cc8 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -68,6 +68,26 @@ docker run -it -d --net=host --shm-size=1g \
 docker exec -it ${容器名称} bash
 ```
 
+## 量化权重生成
+* 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
+
+* 量化权重统一使用${ATB_SPEED_HOME_PATH}/examples/convert/model_slim/quantifier.py脚本生成，以下提供Llama模型量化权重生成快速启动命令
+
+* W8A8量化权重请使用以下指令生成
+    * 注意该量化方式仅支持在Atlas 800I A2服务器上运行
+
+```shell
+# 设置CANN包的环境变量
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+# 关闭虚拟内存
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
+# 进入atb-models目录
+cd ${ATB_SPEED_HOME_PATH}
+sed -i '167s/m3/m4/' examples/models/llama3/generate_quant_weight.sh
+# DeepSeek-R1-Distill-Llama-70B量化 bf16，有回退层，antioutlier使用m4算法配置，使用min-max量化方式，校准数据集使用50条BoolQ数据，在NPU上进行运算
+bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_70b_instruct_bf16_w8a8
+```
+
 ## 纯模型推理
 
 ### 对话测试
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 2f0f50229f..78910af26f 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -68,6 +68,25 @@ docker run -it -d --net=host --shm-size=1g \
 docker exec -it ${容器名称} bash
 ```
 
+## 量化权重生成
+* 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
+
+* 量化权重统一使用${ATB_SPEED_HOME_PATH}/examples/convert/model_slim/quantifier.py脚本生成，以下提供Llama模型量化权重生成快速启动命令
+
+* W8A8量化权重请使用以下指令生成
+    * 注意该量化方式仅支持在Atlas 800I A2服务器上运行
+
+```shell
+# 设置CANN包的环境变量
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+# 关闭虚拟内存
+export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
+# 进入atb-models目录
+cd ${ATB_SPEED_HOME_PATH}
+# DeepSeek-R1-Distill-Llama-8B量化，有回退层，antioutlier使用m1算法配置，使用min-max量化方式，校准数据集使用50条BoolQ数据，在NPU上进行运算
+bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_8b_w8a8
+```
+
 ## 纯模型推理
 
 ### 对话测试
-- 
Gitee


From b67c3d68022256fc6e0ab7f28351da96af80db13 Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Thu, 6 Feb 2025 22:09:28 +0800
Subject: [PATCH 02/12] add reviewers

---
 OWNERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/OWNERS b/OWNERS
index 2989ccc6e8..b62a0bc327 100644
--- a/OWNERS
+++ b/OWNERS
@@ -83,4 +83,5 @@ reviewers:
 - guo-yucheng
 - demo-v
 - manyny
-- bangbangchui
\ No newline at end of file
+- bangbangchui
+- ZeroFadeAway
\ No newline at end of file
-- 
Gitee


From 0659341ed7876146cb541a2535f4de50a473ca2f Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Fri, 7 Feb 2025 19:10:53 +0800
Subject: [PATCH 03/12] add w8a8sc

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md   |  6 ++--
 .../DeepSeek-R1-Distill-Llama-8B/README.md    | 31 +++++++++++++++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index c5e0bf9cc8..8385382350 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -117,16 +117,16 @@ cd $ATB_SPEED_HOME_PATH/tests/modeltest/
 ```shell
 bash run.sh pa_[data_type] performance [case_pair] [batch_size] ([prefill_batch_size]) [model_name] ([is_chat_model]) (lora [lora_data_path]) [weight_dir] ([trust_remote_code]) [chip_num] ([parallel_params]) ([max_position_embedding/max_sequence_length])
 ```
-具体执行batch=1, 输入长度256, 输出长度256用例的4卡并行性能测试命令如下，
+具体执行batch=1, 输入长度256, 输出长度256用例的8卡并行性能测试命令如下，
 
 Atlas 800I A2:
 ```shell
-bash run.sh pa_bf16 performance [[256,256]] 1 llama ${weight_path} 4
+bash run.sh pa_bf16 performance [[256,256]] 1 llama ${weight_path} 8
 ```
 
 Atlas 300I Duo: 
 ```shell
-bash run.sh pa_fp16 performance [[256,256]] 1 llama ${weight_path} 4
+bash run.sh pa_fp16 performance [[256,256]] 1 llama ${weight_path} 8
 ```
 
 > 注：ModelTest为大模型的性能和精度提供测试功能。使用文档请参考`${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md`
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 78910af26f..c9b8e7224a 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -69,6 +69,7 @@ docker exec -it ${容器名称} bash
 ```
 
 ## 量化权重生成
+### Atlas 800I A2 w8a8量化
 * 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
 
 * 量化权重统一使用${ATB_SPEED_HOME_PATH}/examples/convert/model_slim/quantifier.py脚本生成，以下提供Llama模型量化权重生成快速启动命令
@@ -87,6 +88,36 @@ cd ${ATB_SPEED_HOME_PATH}
 bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_8b_w8a8
 ```
 
+### Atlas 300I DUO 稀疏量化
+**Step 1 生成W8A8S量化权重**
+- 注意该量化方式仅支持在Atlas 300I DUO推理卡上运行
+- 修改模型权重config.json中`torch_dtype`字段为`float16`
+- 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
+- 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
+```shell
+# 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
+jq --version
+```
+```shell
+# 运行量化转换脚本
+python3 quant_llama.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True
+```
+
+**Step 2 量化权重切分及压缩**
+```shell
+export IGNORE_INFER_ERROR=1
+# 进入atb-models目录
+cd ${ATB_SPEED_HOME_PATH}
+# 运行切分及压缩脚本
+torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径}
+```
+- TP数为tensor parallel并行个数
+- 注意：若权重生成时以TP=2进行切分，则运行时也需以TP=2运行
+- 示例
+```shell
+torchrun --nproc_per_node 2 -m examples.convert.model_slim.sparse_compressor --model_path /data1/weights/model_slim/Llama-8b_w8a8s --save_directory /data1/weights/model_slim/Llama-8b_w8a8sc
+```
+
 ## 纯模型推理
 
 ### 对话测试
-- 
Gitee


From b2473f6cf42dab9803b6915f28c9f7daccc9bbcb Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Sat, 8 Feb 2025 21:32:01 +0800
Subject: [PATCH 04/12] Update README

---
 .../DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md  | 11 ++++++++++-
 .../DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md   | 11 ++++++++++-
 .../DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md  |  9 +++++++++
 .../DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md   |  9 +++++++++
 .../DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md   |  9 +++++++++
 .../DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md    |  9 +++++++++
 6 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index 8385382350..d5e7d9b95d 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -14,7 +14,16 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 ## 加载镜像
 前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
 
-完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
+完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。
+
+镜像中各组件版本配套如下：
+| 组件 | 版本 |
+| - | - |
+| MindIE | 1.0.0 |
+| CANN | 8.0.0 |
+| PTA | 6.0.0 |
+| MindStudio | 7.0.0 |
+| HDK | 24.1.0 |
 
 ## 约束条件
 * 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插4张Atlas 300I DUO卡的服务器`
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index c9b8e7224a..4be817e213 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -14,7 +14,16 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 ## 加载镜像
 前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-8B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
 
-完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。 
+完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。
+
+镜像中各组件版本配套如下：
+| 组件 | 版本 |
+| - | - |
+| MindIE | 1.0.0 |
+| CANN | 8.0.0 |
+| PTA | 6.0.0 |
+| MindStudio | 7.0.0 |
+| HDK | 24.1.0 |
 
 ## 约束条件
 * 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 967fe1d157..cd61e32d6a 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -26,6 +26,15 @@ or
 docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签)
 ```
 
+镜像中各组件版本配套如下：
+| 组件 | 版本 |
+| - | - |
+| MindIE | 1.0.0 |
+| CANN | 8.0.0 |
+| PTA | 6.0.0 |
+| MindStudio | 7.0.0 |
+| HDK | 24.1.0 |
+
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index 320768c89b..68d21aabee 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -26,6 +26,15 @@ or
 docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签)
 ```
 
+镜像中各组件版本配套如下：
+| 组件 | 版本 |
+| - | - |
+| MindIE | 1.0.0 |
+| CANN | 8.0.0 |
+| PTA | 6.0.0 |
+| MindStudio | 7.0.0 |
+| HDK | 24.1.0 |
+
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index 8f685a5f12..d9c1529ca2 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -26,6 +26,15 @@ or
 docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签)
 ```
 
+镜像中各组件版本配套如下：
+| 组件 | 版本 |
+| - | - |
+| MindIE | 1.0.0 |
+| CANN | 8.0.0 |
+| PTA | 6.0.0 |
+| MindStudio | 7.0.0 |
+| HDK | 24.1.0 |
+
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插2张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 617438f4cb..aace367a46 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -27,6 +27,15 @@ or
 docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签)
 ```
 
+镜像中各组件版本配套如下：
+| 组件 | 版本 |
+| - | - |
+| MindIE | 1.0.0 |
+| CANN | 8.0.0 |
+| PTA | 6.0.0 |
+| MindStudio | 7.0.0 |
+| HDK | 24.1.0 |
+
 ## 约束条件
 - 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
 - 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
-- 
Gitee


From 8690543ea8c57557f1b3a60d568085bd05092bfb Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Tue, 11 Feb 2025 22:44:58 +0800
Subject: [PATCH 05/12] adapt Atlas 300V/Atlas 300I Pro

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md   |  2 +-
 .../DeepSeek-R1-Distill-Llama-8B/README.md    | 11 ++++----
 .../DeepSeek-R1-Distill-Qwen-14B/README.md    | 26 ++++++++++++-------
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index d5e7d9b95d..642a2ecf3b 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -114,7 +114,7 @@ torchrun --nproc_per_node 2 \
          -m examples.run_pa \
          --model_path ${权重路径} \
          --input_texts 'What is deep learning?' \
-         --max_output_length 20 \
+         --max_output_length 20
 ```
 
 ### 性能测试
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 4be817e213..cd08654432 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -26,8 +26,8 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 | HDK | 24.1.0 |
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
-* 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
+* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`或者`1台插1张Atlas 300I Pro推理卡的服务器`或者`1台插1张Atlas 300V视频解析卡的服务器`
+* 在使用Atlas 300I DUO/Atlas 300I Pro推理卡和Atlas 300V视频解析卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 * 支持TP=1/2/4/8推理
 
 ## 新建容器
@@ -97,9 +97,10 @@ cd ${ATB_SPEED_HOME_PATH}
 bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_8b_w8a8
 ```
 
-### Atlas 300I DUO 稀疏量化
+### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
 **Step 1 生成W8A8S量化权重**
-- 注意该量化方式仅支持在Atlas 300I DUO推理卡上运行
+- 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
+- Atlas 300I DUO/Atlas 300I Pro/Atlas 300V 不支持多卡量化
 - 修改模型权重config.json中`torch_dtype`字段为`float16`
 - 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
 - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
@@ -144,7 +145,7 @@ torchrun --nproc_per_node 2 \
          -m examples.run_pa \
          --model_path ${权重路径} \
          --input_texts 'What is deep learning?' \
-         --max_output_length 20 \
+         --max_output_length 20
 ```
 
 ### 性能测试
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index 68d21aabee..93fb919639 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -36,8 +36,8 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 | HDK | 24.1.0 |
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
-- 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`或者`1台插2张Atlas 300I Pro推理卡的服务器`或者`1台插2张Atlas 300V视频解析卡的服务器`
+- 在使用Atlas 300I DUO/Atlas 300I Pro推理卡和Atlas 300V视频解析卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=2/4/8推理
 
 ## 新建容器
@@ -104,9 +104,10 @@ export ASCEND_RT_VISIBLE_DEVICES=0,1
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ```
 
-### 300I DUO 稀疏量化
+### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
   - Step 1
-    - 注意该量化方式仅支持在Atlas 300I DUO推理卡上运行
+    - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
+    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V 不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
     - git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
@@ -118,11 +119,18 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
     - 上一步编译操作会得到bulid文件夹，给build文件夹相关权限 chmod -R 550 build
     - 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`；并在进入的Qwen目录下，运行量化转换脚本
     注： 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空
-    ```bash
-    export ASCEND_RT_VISIBLE_DEVICES=0
-    export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
-    python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/cn_en.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True --sigma_factor 4.0 --anti_method m4
-    ```
+    
+    **Atlas 300I DUO**使用以下方式生成W8A8S量化权重
+      ```bash
+      export ASCEND_RT_VISIBLE_DEVICES=0
+      export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
+      python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/cn_en.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True --sigma_factor 4.0 --anti_method m4
+      ```
+    **Atlas 300I Pro/Atlas 300V**使用以下方式生成W8A8S量化权重
+      ```bash
+      python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/cn_en.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type cpu --use_sigma True --is_lowbit True --sigma_factor 4.0 --anti_method m4
+      ```
+    > Atlas 300I Pro/Atlas 300V量化过程耗时较长，预计5小时左右，可以在Atlas 300I DUO上先生成W8A8S量化权重路径，再搬运到Atlas 300I Pro/Atlas 300V执行后续步骤。
 
   - Step 2：量化权重切分及压缩
     ```shell
-- 
Gitee


From 27cffdb5b97fdd583bfc150d12f7b4ab56444724 Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Tue, 11 Feb 2025 22:48:57 +0800
Subject: [PATCH 06/12] update

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index cd08654432..27fae12663 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -100,7 +100,7 @@ bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -
 ### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
 **Step 1 生成W8A8S量化权重**
 - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-- Atlas 300I DUO/Atlas 300I Pro/Atlas 300V 不支持多卡量化
+- Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
 - 修改模型权重config.json中`torch_dtype`字段为`float16`
 - 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
 - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
-- 
Gitee


From 0f12549e4b70d55e8ff78b09045ca94c09c9c5d3 Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Tue, 11 Feb 2025 23:01:31 +0800
Subject: [PATCH 07/12] add qwen-7B

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index aace367a46..2481f11883 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -37,8 +37,8 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名
 | HDK | 24.1.0 |
 
 ## 约束条件
-- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
-- 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
+- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`或者`1台插1张Atlas 300I Pro推理卡的服务器`或者`1台插1张Atlas 300V视频解析卡的服务器`
+- 在使用Atlas 300I DUO/Atlas 300I Pro推理卡和Atlas 300V视频解析卡部署模型时，需要修改权重目录下的`config.json`文件，**"torch_dtype"字段改为"float16"**
 - 支持TP=1/2/4/8推理
 
 ## 新建容器
@@ -108,7 +108,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ```
 ### 稀疏量化
   - Step 1
-    - 注意该量化方式仅支持在Atlas 300I DUO推理卡上运行
+    - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
+    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 下载msmodelslim量化工具
     - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
-- 
Gitee


From 0180a1de3bed075f275d252ba7a3330b45f876d5 Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Fri, 14 Feb 2025 17:54:45 +0800
Subject: [PATCH 08/12] add llama-70b w8a8sc

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md   | 34 +++++++++++++++++++
 .../DeepSeek-R1-Distill-Llama-8B/README.md    |  9 ++---
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index 642a2ecf3b..a7b17630cc 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -78,6 +78,7 @@ docker exec -it ${容器名称} bash
 ```
 
 ## 量化权重生成
+### Atlas 800I A2 w8a8量化
 * 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
 
 * 量化权重统一使用${ATB_SPEED_HOME_PATH}/examples/convert/model_slim/quantifier.py脚本生成，以下提供Llama模型量化权重生成快速启动命令
@@ -97,6 +98,39 @@ sed -i '167s/m3/m4/' examples/models/llama3/generate_quant_weight.sh
 bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_70b_instruct_bf16_w8a8
 ```
 
+### Atlas 300I DUO稀疏量化
+**Step 1 生成W8A8S量化权重**
+- 注意该量化方式生成的量化权重仅支持在Atlas 300I DUO卡上运行
+- DeepSeek-R1-Distill-Llama-70B量化过程资源占用较大，Step1建议在Atlas 800IA2-64G上生成
+- 修改模型权重config.json中`torch_dtype`字段为`float16`
+- 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
+```shell
+export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+# 运行量化转换脚本
+cd ${ATB_SPEED_HOME_PATH}/examples/models/llama3
+python convert_quant_weights.py --w_bit 4 --a_bit 8  --is_lowbit True --device_type npu --act_method 1 --model_path {浮点权重路径}  --save_directory {W8A8S量化权重路径}  --calib_file ./cable_file.jsonl --anti_method m4 --use_sigma True
+```
+
+**Step 2 量化权重切分及压缩**
+- 该步骤需要在Atlas 300I DUO卡上运行
+```shell
+# 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
+jq --version
+```
+```shell
+export IGNORE_INFER_ERROR=1
+# 进入atb-models目录
+cd ${ATB_SPEED_HOME_PATH}
+# 运行切分及压缩脚本
+torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --multiprocess_num 4 --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径}
+```
+- TP数为tensor parallel并行个数
+- 注意：若权重生成时以TP=8进行切分，则运行时也需以TP=8运行
+- 示例
+```shell
+torchrun --nproc_per_node 8 -m examples.convert.model_slim.sparse_compressor --multiprocess_num 4 --model_path /data1/weights/model_slim/Llama-70b_w8a8s --save_directory /data1/weights/model_slim/Llama-70b_w8a8sc
+```
+
 ## 纯模型推理
 
 ### 对话测试
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 27fae12663..bfdcf9f745 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -105,15 +105,16 @@ bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -
 - 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
 - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
 ```shell
-# 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
-jq --version
-```
-```shell
 # 运行量化转换脚本
 python3 quant_llama.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True
 ```
 
 **Step 2 量化权重切分及压缩**
+- 该步骤需要在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V上运行
+```shell
+# 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
+jq --version
+```
 ```shell
 export IGNORE_INFER_ERROR=1
 # 进入atb-models目录
-- 
Gitee


From 3a7742221d69df0287593d432cca6f3d0782972f Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Thu, 20 Feb 2025 09:43:57 +0800
Subject: [PATCH 09/12] remov Llama-70B DUO info

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md   | 47 ++-----------------
 1 file changed, 4 insertions(+), 43 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index a7b17630cc..b172d61c78 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -12,7 +12,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 - [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/tree/main)
 
 ## 加载镜像
-前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
+前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts
 
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。
 
@@ -26,8 +26,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 | HDK | 24.1.0 |
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插4张Atlas 300I DUO卡的服务器`
-* 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
+* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`
 * 支持TP=8推理
 
 ## 新建容器
@@ -98,39 +97,6 @@ sed -i '167s/m3/m4/' examples/models/llama3/generate_quant_weight.sh
 bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_70b_instruct_bf16_w8a8
 ```
 
-### Atlas 300I DUO稀疏量化
-**Step 1 生成W8A8S量化权重**
-- 注意该量化方式生成的量化权重仅支持在Atlas 300I DUO卡上运行
-- DeepSeek-R1-Distill-Llama-70B量化过程资源占用较大，Step1建议在Atlas 800IA2-64G上生成
-- 修改模型权重config.json中`torch_dtype`字段为`float16`
-- 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
-```shell
-export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-# 运行量化转换脚本
-cd ${ATB_SPEED_HOME_PATH}/examples/models/llama3
-python convert_quant_weights.py --w_bit 4 --a_bit 8  --is_lowbit True --device_type npu --act_method 1 --model_path {浮点权重路径}  --save_directory {W8A8S量化权重路径}  --calib_file ./cable_file.jsonl --anti_method m4 --use_sigma True
-```
-
-**Step 2 量化权重切分及压缩**
-- 该步骤需要在Atlas 300I DUO卡上运行
-```shell
-# 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
-jq --version
-```
-```shell
-export IGNORE_INFER_ERROR=1
-# 进入atb-models目录
-cd ${ATB_SPEED_HOME_PATH}
-# 运行切分及压缩脚本
-torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --multiprocess_num 4 --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径}
-```
-- TP数为tensor parallel并行个数
-- 注意：若权重生成时以TP=8进行切分，则运行时也需以TP=8运行
-- 示例
-```shell
-torchrun --nproc_per_node 8 -m examples.convert.model_slim.sparse_compressor --multiprocess_num 4 --model_path /data1/weights/model_slim/Llama-70b_w8a8s --save_directory /data1/weights/model_slim/Llama-70b_w8a8sc
-```
-
 ## 纯模型推理
 
 ### 对话测试
@@ -167,11 +133,6 @@ Atlas 800I A2:
 bash run.sh pa_bf16 performance [[256,256]] 1 llama ${weight_path} 8
 ```
 
-Atlas 300I Duo: 
-```shell
-bash run.sh pa_fp16 performance [[256,256]] 1 llama ${weight_path} 8
-```
-
 > 注：ModelTest为大模型的性能和精度提供测试功能。使用文档请参考`${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md`
 ## 服务化推理
 
@@ -200,7 +161,7 @@ vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
 
 "BackendConfig": {
 ...
-"npuDeviceIds" : [[0,1,2,3]],
+"npuDeviceIds" : [[0,1,2,3,4,5,6,7]],
 ...
 "ModelDeployConfig":
 {
@@ -209,7 +170,7 @@ vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
 ...
 "modelName" : "llama",
 "modelWeightPath" : "/data/datasets/DeepSeek-R1-Distill-Llama-70B",
-"worldSize" : 4,
+"worldSize" : 8,
 ...
 }
 ]
-- 
Gitee


From 12c1dfcdaf0f59309f270932a5f9233fd8fe3c18 Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Tue, 25 Feb 2025 20:01:47 +0800
Subject: [PATCH 10/12] UPDATE README

---
 .../LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md  | 8 ++++++--
 .../DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md       | 6 +++++-
 .../LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md   | 8 ++++++--
 .../DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md      | 6 +++++-
 .../LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md  | 6 +++++-
 .../LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md   | 6 +++++-
 .../LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md   | 6 +++++-
 .../DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md        | 3 +++
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 6 +++++-
 MindIE/LLM/Qwen2.5/Qwen2.5-0.5B-Instruct/README.md        | 4 ++++
 MindIE/LLM/Qwen2.5/Qwen2.5-1.5B-Instruct/README.md        | 4 ++++
 MindIE/LLM/Qwen2.5/Qwen2.5-14B-Instruct/README.md         | 4 ++++
 MindIE/LLM/Qwen2.5/Qwen2.5-32B-Instruct/README.md         | 4 ++++
 MindIE/LLM/Qwen2.5/Qwen2.5-72B-Instruct/README.md         | 4 ++++
 MindIE/LLM/Qwen2.5/Qwen2.5-7B-Instruct/README.md          | 4 ++++
 15 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index b172d61c78..6d0a64e5df 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -12,7 +12,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 - [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/tree/main)
 
 ## 加载镜像
-前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts
+前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包：1.0.0-800I-A2-py311-openeuler24.03-lts
 
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。
 
@@ -211,4 +211,8 @@ curl 127.0.0.1:1025/generate -d '{
 ```shell
 pip install transformers==4.46.3 --force-reinstall
 pip install numpy==1.26.4 --force-reinstall
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md
index fa14bad942..a67174c44d 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md
@@ -210,4 +210,8 @@ python   -m examples.run_fa_edge \
          --model_path ${权重路径} \
          --input_text 'What is deep learning?' \
          --max_output_length 20 \
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index bfdcf9f745..0c8f65a4cf 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -12,7 +12,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 - [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/tree/main)
 
 ## 加载镜像
-前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-8B的镜像包：1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
+前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-8B的镜像包：1.0.0-800I-A2-py311-openeuler24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts
 
 完成之后，请使用`docker images`命令确认查找具体镜像名称与标签。
 
@@ -248,4 +248,8 @@ curl 127.0.0.1:1025/generate -d '{
 ```shell
 pip install transformers==4.46.3 --force-reinstall
 pip install numpy==1.26.4 --force-reinstall
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
index de9e30abf2..c1d8770747 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md
@@ -228,4 +228,8 @@ python   -m examples.run_fa_edge \
          --model_path ${权重路径} \
          --input_text 'What is deep learning?' \
          --max_output_length 20 \
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index d85d9d1896..31e5400912 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -250,4 +250,8 @@ curl 127.0.0.1:1040/generate -d '{
 ```shell
 pip install transformers==4.46.3 --force-reinstall
 pip install numpy==1.26.4 --force-reinstall
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index 6690ce61b8..251f11f402 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -259,4 +259,8 @@ curl 127.0.0.1:1040/generate -d '{
 ```shell
 pip install transformers==4.46.3 --force-reinstall
 pip install numpy==1.26.4 --force-reinstall
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index bb463b4543..4c949a1da7 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -260,4 +260,8 @@ curl 127.0.0.1:1040/generate -d '{
 ```shell
 pip install transformers==4.46.3 --force-reinstall
 pip install numpy==1.26.4 --force-reinstall
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
index f40568eac0..f22d8534dd 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md
@@ -220,3 +220,6 @@ python   -m examples.run_fa_edge \
          --max_output_length 20 \
 ```
 
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index 2481f11883..a485aeb356 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -249,4 +249,8 @@ curl 127.0.0.1:1040/generate -d '{
 ```shell
 pip install transformers==4.46.3 --force-reinstall
 pip install numpy==1.26.4 --force-reinstall
-```
\ No newline at end of file
+```
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/Qwen2.5/Qwen2.5-0.5B-Instruct/README.md b/MindIE/LLM/Qwen2.5/Qwen2.5-0.5B-Instruct/README.md
index 18a69892d3..6ce15aa60c 100644
--- a/MindIE/LLM/Qwen2.5/Qwen2.5-0.5B-Instruct/README.md
+++ b/MindIE/LLM/Qwen2.5/Qwen2.5-0.5B-Instruct/README.md
@@ -131,3 +131,7 @@ bash run.sh pa_bf16 full_BoolQ 1 qwen ${Qwen2.5-0.5B-Instruct权重路径} 2
 
 - 对话测试实际执行的 Python 文件为`${llm_path}/examples/run_pa.py`
 - Qwen2.5系列模型当前800I A2采用bf16， 300I DUO使用fp16 
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/Qwen2.5/Qwen2.5-1.5B-Instruct/README.md b/MindIE/LLM/Qwen2.5/Qwen2.5-1.5B-Instruct/README.md
index e797fcfe05..f912b5d892 100644
--- a/MindIE/LLM/Qwen2.5/Qwen2.5-1.5B-Instruct/README.md
+++ b/MindIE/LLM/Qwen2.5/Qwen2.5-1.5B-Instruct/README.md
@@ -131,3 +131,7 @@ bash run.sh pa_bf16 full_BoolQ 1 qwen ${Qwen2.5-1.5B-Instruct权重路径} 2
 
 - 对话测试实际执行的 Python 文件为`${llm_path}/examples/run_pa.py`
 - Qwen2.5系列模型当前800I A2采用bf16， 300I DUO使用fp16 
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/Qwen2.5/Qwen2.5-14B-Instruct/README.md b/MindIE/LLM/Qwen2.5/Qwen2.5-14B-Instruct/README.md
index 2600d2b1f0..404c5be82e 100644
--- a/MindIE/LLM/Qwen2.5/Qwen2.5-14B-Instruct/README.md
+++ b/MindIE/LLM/Qwen2.5/Qwen2.5-14B-Instruct/README.md
@@ -183,3 +183,7 @@ bash run.sh pa_bf16 full_BoolQ 1 qwen ${Qwen2.5-14B-Instruct权重路径} 2
 
 - 对话测试实际执行的 Python 文件为`${llm_path}/examples/run_pa.py`
 - Qwen2.5系列模型当前800I A2采用bf16， 300I DUO使用fp16 
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/Qwen2.5/Qwen2.5-32B-Instruct/README.md b/MindIE/LLM/Qwen2.5/Qwen2.5-32B-Instruct/README.md
index 690a5e7409..554b98b33f 100644
--- a/MindIE/LLM/Qwen2.5/Qwen2.5-32B-Instruct/README.md
+++ b/MindIE/LLM/Qwen2.5/Qwen2.5-32B-Instruct/README.md
@@ -154,3 +154,7 @@ bash run.sh pa_bf16 full_BoolQ 1 qwen ${Qwen2.5-32B-Instruct权重路径} 8
 
 - 对话测试实际执行的 Python 文件为`${llm_path}/examples/run_pa.py`
 - Qwen2.5系列模型当前800I A2采用bf16， 300I DUO使用fp16 
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/Qwen2.5/Qwen2.5-72B-Instruct/README.md b/MindIE/LLM/Qwen2.5/Qwen2.5-72B-Instruct/README.md
index 25f31ae9bf..d177bb02ff 100644
--- a/MindIE/LLM/Qwen2.5/Qwen2.5-72B-Instruct/README.md
+++ b/MindIE/LLM/Qwen2.5/Qwen2.5-72B-Instruct/README.md
@@ -144,3 +144,7 @@ bash run.sh pa_bf16 full_BoolQ 1 qwen ${Qwen2.5-72B-Instruct权重路径} 8
 
 - 对话测试实际执行的 Python 文件为`${llm_path}/examples/run_pa.py`
 - Qwen2.5系列模型当前800I A2采用bf16， 300I DUO使用fp16 
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
diff --git a/MindIE/LLM/Qwen2.5/Qwen2.5-7B-Instruct/README.md b/MindIE/LLM/Qwen2.5/Qwen2.5-7B-Instruct/README.md
index 55bd0ce177..64b9dbe8c3 100644
--- a/MindIE/LLM/Qwen2.5/Qwen2.5-7B-Instruct/README.md
+++ b/MindIE/LLM/Qwen2.5/Qwen2.5-7B-Instruct/README.md
@@ -183,3 +183,7 @@ bash run.sh pa_bf16 full_BoolQ 1 qwen ${Qwen2.5-7B-Instruct权重路径} 2
 
 - 对话测试实际执行的 Python 文件为`${llm_path}/examples/run_pa.py`
 - Qwen2.5系列模型当前800I A2采用bf16， 300I DUO使用fp16 
+
+## 声明
+- 本代码仓提到的数据集和模型仅作为示例，这些数据集和模型仅供您用于非商业目的，如您使用这些数据集和模型来完成示例，请您特别注意应遵守对应数据集和模型的License，如您因使用数据集或模型而产生侵权纠纷，华为不承担任何责任。
+- 如您在使用本代码仓的过程中，发现任何问题（包括但不限于功能问题、合规问题），请在本代码仓提交issue，我们将及时审视并解答。
\ No newline at end of file
-- 
Gitee


From 7081f525d40d9474af9e6f741df24d564347130f Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Fri, 28 Feb 2025 18:45:28 +0800
Subject: [PATCH 11/12] remove 310p quant info

---
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md  | 1 -
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 1 -
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md  | 1 -
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md  | 1 -
 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md   | 1 -
 5 files changed, 5 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 0c8f65a4cf..16e5e563cf 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -100,7 +100,6 @@ bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -
 ### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
 **Step 1 生成W8A8S量化权重**
 - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-- Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
 - 修改模型权重config.json中`torch_dtype`字段为`float16`
 - 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
 - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
index 31e5400912..250b34ee0c 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md
@@ -111,7 +111,6 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ### 稀疏量化
   - Step 1
     - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 下载msmodelslim量化工具
     - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
index 251f11f402..ef5f3c12db 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md
@@ -107,7 +107,6 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
   - Step 1
     - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
     - git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
index 4c949a1da7..e2c70fcc0d 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md
@@ -107,7 +107,6 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
   - Step 1
     - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md)
     - git clone下载msit仓代码； `git clone https://gitee.com/ascend/msit.git`
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
index a485aeb356..f64da51eb3 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md
@@ -109,7 +109,6 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
 ### 稀疏量化
   - Step 1
     - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
-    - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化
     - 修改模型权重config.json中`torch_dtype`字段为`float16`
     - 下载msmodelslim量化工具
     - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim
-- 
Gitee


From 268b813e8e72e09d70929bbd64dfa22e0654ad7d Mon Sep 17 00:00:00 2001
From: ShiyaNiu <1025125896@qq.com>
Date: Thu, 3 Apr 2025 16:18:15 +0800
Subject: [PATCH 12/12] update Llama-8B/70B README

---
 .../DeepSeek-R1-Distill-Llama-70B/README.md       | 15 +++++----------
 .../DeepSeek-R1-Distill-Llama-8B/README.md        | 14 +++++++-------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
index 6d0a64e5df..c354fa2307 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md
@@ -78,23 +78,18 @@ docker exec -it ${容器名称} bash
 
 ## 量化权重生成
 ### Atlas 800I A2 w8a8量化
-* 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
-
-* 量化权重统一使用${ATB_SPEED_HOME_PATH}/examples/convert/model_slim/quantifier.py脚本生成，以下提供Llama模型量化权重生成快速启动命令
+* 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md#msmodelslim%E5%AE%89%E8%A3%85%E6%96%B9%E5%BC%8F)
+* 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
 
 * W8A8量化权重请使用以下指令生成
     * 注意该量化方式仅支持在Atlas 800I A2服务器上运行
-
 ```shell
 # 设置CANN包的环境变量
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 # 关闭虚拟内存
 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False
-# 进入atb-models目录
-cd ${ATB_SPEED_HOME_PATH}
-sed -i '167s/m3/m4/' examples/models/llama3/generate_quant_weight.sh
-# DeepSeek-R1-Distill-Llama-70B量化 bf16，有回退层，antioutlier使用m4算法配置，使用min-max量化方式，校准数据集使用50条BoolQ数据，在NPU上进行运算
-bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_70b_instruct_bf16_w8a8
+# 运行量化转换脚本
+python3 quant_llama.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl  --device_type npu --disable_level L5 --anti_method m4 --act_method 3
 ```
 
 ## 纯模型推理
@@ -109,7 +104,7 @@ cd $ATB_SPEED_HOME_PATH
 执行对话测试
 
 ```shell
-torchrun --nproc_per_node 2 \
+torchrun --nproc_per_node 8 \
          --master_port 20037 \
          -m examples.run_pa \
          --model_path ${权重路径} \
diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
index 16e5e563cf..0ec989ff22 100644
--- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
+++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md
@@ -26,8 +26,8 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa
 | HDK | 24.1.0 |
 
 ## 约束条件
-* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`或者`1台插1张Atlas 300I Pro推理卡的服务器`或者`1台插1张Atlas 300V视频解析卡的服务器`
-* 在使用Atlas 300I DUO/Atlas 300I Pro推理卡和Atlas 300V视频解析卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
+* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`
+* 在使用Atlas 300I DUO推理卡部署模型时，需要修改权重目录下的config.json文件，"torch_dtype"字段改为"float16"
 * 支持TP=1/2/4/8推理
 
 ## 新建容器
@@ -79,7 +79,7 @@ docker exec -it ${容器名称} bash
 
 ## 量化权重生成
 ### Atlas 800I A2 w8a8量化
-* 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
+* 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md#msmodelslim%E5%AE%89%E8%A3%85%E6%96%B9%E5%BC%8F)
 
 * 量化权重统一使用${ATB_SPEED_HOME_PATH}/examples/convert/model_slim/quantifier.py脚本生成，以下提供Llama模型量化权重生成快速启动命令
 
@@ -97,11 +97,11 @@ cd ${ATB_SPEED_HOME_PATH}
 bash examples/models/llama3/generate_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type llama3.1_8b_w8a8
 ```
 
-### Atlas 300I DUO/Atlas 300I Pro/Atlas 300V稀疏量化
+### Atlas 300I DUO稀疏量化
 **Step 1 生成W8A8S量化权重**
-- 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行
+- 注意该量化方式仅支持在Atlas 300I DUO卡上运行
 - 修改模型权重config.json中`torch_dtype`字段为`float16`
-- 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/tree/dev/msmodelslim)
+- 生成量化权重依赖msModelSlim工具，安装方式见[此README](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md#msmodelslim%E5%AE%89%E8%A3%85%E6%96%B9%E5%BC%8F)
 - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Llama的目录 `cd msit/msmodelslim/example/Llama`；
 ```shell
 # 运行量化转换脚本
@@ -109,7 +109,7 @@ python3 quant_llama.py --model_path {浮点权重路径} --save_directory {W8A8S
 ```
 
 **Step 2 量化权重切分及压缩**
-- 该步骤需要在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V上运行
+- 该步骤需要在Atlas 300I DUO卡上运行
 ```shell
 # 执行"jq --version"查看是否安装jq，若返回"bash：jq：command not found"，则依次执行"apt-get update"和"apt install jq"
 jq --version
-- 
Gitee