From 56b5a0567e4ab8b4bf009c896cf59349c12a8902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Wed, 5 Feb 2025 18:01:52 +0800 Subject: [PATCH 01/18] add README.md --- .../DeepSeek-R1-Distill-Qwen-1.5B/README.md | 205 +++++++++++++++++ .../DeepSeek-R1-Distill-Qwen-7B/README.md | 206 ++++++++++++++++++ 2 files changed, 411 insertions(+) create mode 100644 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md create mode 100644 MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md new file mode 100644 index 0000000000..c2ae4cf0a2 --- /dev/null +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -0,0 +1,205 @@ + +# DeepseekR1 + +## Usage + +We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model. + +## 权重 + +**权重下载** + +- [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/tree/main) + +**权重转换** +由于提供的是.safetensor权重,无需转换,可以直接使用。 + +## 加载镜像 +前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配本模型的镜像包:1.0.0-800I-A2-py311-openeuler24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts + +完成加载镜像后,请使用`docker images`命令确认查找具体镜像名称与标签。 +```shelll +docker load -i mindie:1.0.0-800I-A2-py311-openeuler24.03-lts(下载的镜像名称与标签) +``` +or +```shelll +docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签) +``` + +## 约束条件 +- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要1台800I A2 32G服务器或1台300I DUO服务器 +- 在300I DUO服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 当前支持TP=1/2/4/8推理 + +## 新建容器 + +目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Qwen-1.5B模型推理脚本,无需再额外下载魔乐仓库承载的模型适配代码,直接新建容器即可。 + +执行以下启动命令(参考): +如果您使用的是root用户镜像(例如从Ascend Hub上取得),并且可以使用特权容器,请使用以下命令启动容器: +```sh +docker run -it -d --net=host --shm-size=1g \ + --privileged \ + --name \ + --device=/dev/davinci_manager \ + --device=/dev/hisi_hdc \ + --device=/dev/devmm_svm \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \ + -v /usr/local/sbin:/usr/local/sbin:ro \ + -v /path-to-weights:/path-to-weights:ro \ + mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash +``` + +如果您希望使用自行构建的普通用户镜像,并且规避容器相关权限风险,可以使用以下命令指定用户与设备: +```sh +docker run -it -d --net=host --shm-size=1g \ + --user mindieuser: \ + --name \ + --device=/dev/davinci_manager \ + --device=/dev/hisi_hdc \ + --device=/dev/devmm_svm \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \ + -v /usr/local/sbin:/usr/local/sbin:ro \ + -v /path-to-weights:/path-to-weights:ro \ + mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash +``` +> 注意,以上启动命令仅供参考,请根据需求自行修改再启动容器,尤其需要注意: +> +> 1. `--user`,如果您的环境中HDK是通过普通用户安装(例如默认的`HwHiAiUser`,可以通过`id HwHiAiUser`命令查看该用户组ID),请设置好对应的用户组,例如用户组1001可以使用HDK,则`--user mindieuser:1001`,镜像中默认使用的是用户组1000。如果您的HDK是由root用户安装,且指定了`--install-for-all`参数,则无需指定`--user`参数。 +> +> 2. 设定容器名称`--name`与镜像名称,800I A2和300I DUO各自使用对应版本的镜像,例如800I A2服务器使用`mindie:1.0.0-py3.11-800I-A2-aarch64-Ubuntu22.04`。 +> +> 3. 设定想要使用的卡号`--device`。 +> +> 4. 设定权重挂载的路径,`-v /path-to-weights:/path-to-weights:ro`,注意,如果使用普通用户镜像,权重路径所属应为镜像内默认的1000用户,且权限可设置为750。可使用以下命令进行修改: +> ```sh +> chown -R 1000:1000 /path-to-weights +> chmod -R 755 /path-to-weights +> ``` +> 5. **在普通用户镜像中,注意所有文件均在 `/home/mindieuser` 下,请勿直接挂载 `/home` 目录,以免宿主机上存在相同目录,将容器内文件覆盖清除。** + +## 进入容器 +```shell +docker exec -it ${容器名称} bash +``` + +## 纯模型推理 + +### 对话测试 +进入llm_model路径 + +```shell +cd $ATB_SPEED_HOME_PATH +``` + +执行对话测试 + +```shell +torchrun --nproc_per_node 2 \ + --master_port 20037 \ + -m examples.run_pa \ + --model_path {权重路径} \ + --max_output_length 20 +``` + +### 性能测试 +进入ModelTest路径 +```shell +cd $ATB_SPEED_HOME_PATH/tests/modeltest/ +``` +运行测试脚本 +```shell +bash run.sh pa_[data_type] performance [case_pair] [batch_size] ([prefill_batch_size]) [model_name] ([is_chat_model]) (lora [lora_data_path]) [weight_dir] ([trust_remote_code]) [chip_num] ([parallel_params]) ([max_position_embedding/max_sequence_length]) +``` +具体执行batch=1, 输入长度256, 输出长度256用例的2卡并行性能测试命令为: +```shell +bash run.sh pa_bf16 performance [[256,256]] 1 qwen ${weight_path} 2 +``` + +> 注:ModelTest为大模型的性能和精度提供测试功能。使用文档请参考`${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md` +## 服务化推理 + + +- 打开配置文件 + +```shell +vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json +``` + +- 更改配置文件 + +```json +{ +... +"ServerConfig" : +{ +... +"port" : 1040, #自定义 +"managementPort" : 1041, #自定义 +"metricsPort" : 1042, #自定义 +... +"httpsEnabled" : false, +... +}, + +"BackendConfig": { +... +"npuDeviceIds" : [[0,1]], +... +"ModelDeployConfig": +{ +"truncation" : false, +"ModelConfig" : [ +{ +... +"modelName" : "qwen", +"modelWeightPath" : "/data/datasets/DeepSeek-R1-Distill-Qwen-1.5B", +"worldSize" : 2, +... +} +] +}, +} +} +``` + +- 拉起服务化 + +```shell +cd /usr/local/Ascend/mindie/latest/mindie-service/bin +./mindieservice_daemon +``` + +- 新建窗口测试(VLLM接口) + +```shell +curl 127.0.0.1:1040/generate -d '{ +"prompt": "What's deep learning?", +"max_tokens": 32, +"stream": false, +"do_sample":true, +"repetition_penalty": 1.00, +"temperature": 0.01, +"top_p": 0.001, +"top_k": 1, +"model": "qwen" +}' +``` + +> 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html) + +## 常见问题 +1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。 + +```shell +pip install transformers==4.46.3 --force-reinstall +pip install numpy==1.26.4 --force-reinstall +``` \ No newline at end of file diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md new file mode 100644 index 0000000000..48a577d521 --- /dev/null +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -0,0 +1,206 @@ + +# DeepseekR1 + +## Usage + +We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model. + +## 权重 + +**权重下载** + +- [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/tree/main) + + +**权重转换** +由于提供的是.safetensor权重,无需转换,可以直接使用。 + +## 加载镜像 +前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配本模型的镜像包:1.0.0-800I-A2-py311-openeuler24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts + +完成加载镜像后,请使用`docker images`命令确认查找具体镜像名称与标签。 +```shelll +docker load -i mindie:1.0.0-800I-A2-py311-openeuler24.03-lts(下载的镜像名称与标签) +``` +or +```shelll +docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名称与标签) +``` + +## 约束条件 +- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要1台800I A2 32G服务器或1台300I DUO服务器 +- 在300I DUO服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 当前支持TP=1/2/4/8推理 + +## 新建容器 + +目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Qwen-7B模型推理脚本,无需再额外下载魔乐仓库承载的模型适配代码,直接新建容器即可。 + +执行以下启动命令(参考): +如果您使用的是root用户镜像(例如从Ascend Hub上取得),并且可以使用特权容器,请使用以下命令启动容器: +```sh +docker run -it -d --net=host --shm-size=1g \ + --privileged \ + --name \ + --device=/dev/davinci_manager \ + --device=/dev/hisi_hdc \ + --device=/dev/devmm_svm \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \ + -v /usr/local/sbin:/usr/local/sbin:ro \ + -v /path-to-weights:/path-to-weights:ro \ + mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash +``` + +如果您希望使用自行构建的普通用户镜像,并且规避容器相关权限风险,可以使用以下命令指定用户与设备: +```sh +docker run -it -d --net=host --shm-size=1g \ + --user mindieuser: \ + --name \ + --device=/dev/davinci_manager \ + --device=/dev/hisi_hdc \ + --device=/dev/devmm_svm \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \ + -v /usr/local/sbin:/usr/local/sbin:ro \ + -v /path-to-weights:/path-to-weights:ro \ + mindie:1.0.0-800I-A2-py311-openeuler24.03-lts bash +``` +> 注意,以上启动命令仅供参考,请根据需求自行修改再启动容器,尤其需要注意: +> +> 1. `--user`,如果您的环境中HDK是通过普通用户安装(例如默认的`HwHiAiUser`,可以通过`id HwHiAiUser`命令查看该用户组ID),请设置好对应的用户组,例如用户组1001可以使用HDK,则`--user mindieuser:1001`,镜像中默认使用的是用户组1000。如果您的HDK是由root用户安装,且指定了`--install-for-all`参数,则无需指定`--user`参数。 +> +> 2. 设定容器名称`--name`与镜像名称,800I A2和300I DUO各自使用对应版本的镜像,例如800I A2服务器使用`mindie:1.0.0-py3.11-800I-A2-aarch64-Ubuntu22.04`。 +> +> 3. 设定想要使用的卡号`--device`。 +> +> 4. 设定权重挂载的路径,`-v /path-to-weights:/path-to-weights:ro`,注意,如果使用普通用户镜像,权重路径所属应为镜像内默认的1000用户,且权限可设置为750。可使用以下命令进行修改: +> ```sh +> chown -R 1000:1000 /path-to-weights +> chmod -R 755 /path-to-weights +> ``` +> 5. **在普通用户镜像中,注意所有文件均在 `/home/mindieuser` 下,请勿直接挂载 `/home` 目录,以免宿主机上存在相同目录,将容器内文件覆盖清除。** + +## 进入容器 +```shell +docker exec -it ${容器名称} bash +``` + +## 纯模型推理 + +### 对话测试 +进入llm_model路径 + +```shell +cd $ATB_SPEED_HOME_PATH +``` + +执行对话测试 + +```shell +torchrun --nproc_per_node 2 \ + --master_port 20037 \ + -m examples.run_pa \ + --model_path {权重路径} \ + --max_output_length 20 +``` + +### 性能测试 +进入ModelTest路径 +```shell +cd $ATB_SPEED_HOME_PATH/tests/modeltest/ +``` +运行测试脚本 +```shell +bash run.sh pa_[data_type] performance [case_pair] [batch_size] ([prefill_batch_size]) [model_name] ([is_chat_model]) (lora [lora_data_path]) [weight_dir] ([trust_remote_code]) [chip_num] ([parallel_params]) ([max_position_embedding/max_sequence_length]) +``` +具体执行batch=1, 输入长度256, 输出长度256用例的2卡并行性能测试命令为: +```shell +bash run.sh pa_bf16 performance [[256,256]] 1 qwen ${weight_path} 2 +``` + +> 注:ModelTest为大模型的性能和精度提供测试功能。使用文档请参考`${ATB_SPEED_HOME_PATH}/tests/modeltest/README.md` +## 服务化推理 + + +- 打开配置文件 + +```shell +vim /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json +``` + +- 更改配置文件 + +```json +{ +... +"ServerConfig" : +{ +... +"port" : 1040, #自定义 +"managementPort" : 1041, #自定义 +"metricsPort" : 1042, #自定义 +... +"httpsEnabled" : false, +... +}, + +"BackendConfig": { +... +"npuDeviceIds" : [[0,1]], +... +"ModelDeployConfig": +{ +"truncation" : false, +"ModelConfig" : [ +{ +... +"modelName" : "qwen", +"modelWeightPath" : "/data/datasets/DeepSeek-R1-Distill-Qwen-7B", +"worldSize" : 2, +... +} +] +}, +} +} +``` + +- 拉起服务化 + +```shell +cd /usr/local/Ascend/mindie/latest/mindie-service/bin +./mindieservice_daemon +``` + +- 新建窗口测试(VLLM接口) + +```shell +curl 127.0.0.1:1040/generate -d '{ +"prompt": "What's deep learning?", +"max_tokens": 32, +"stream": false, +"do_sample":true, +"repetition_penalty": 1.00, +"temperature": 0.01, +"top_p": 0.001, +"top_k": 1, +"model": "qwen" +}' +``` + +> 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html) + +## 常见问题 +1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。 + +```shell +pip install transformers==4.46.3 --force-reinstall +pip install numpy==1.26.4 --force-reinstall +``` \ No newline at end of file -- Gitee From b0740f940b48fd63478f738a3880fef6c42578ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 09:38:37 +0800 Subject: [PATCH 02/18] update readme.md --- MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md | 4 ++-- MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md | 4 ++-- MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 6 +++--- MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md index b1789e8e3e..1e9021ddee 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md +++ b/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md @@ -11,7 +11,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa - [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/tree/main) -### 加载镜像 +## 加载镜像 前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-70B的镜像包:1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts 完成之后,请使用`docker images`命令确认查找具体镜像名称与标签。 @@ -21,7 +21,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa * 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" * 支持TP=8推理 -### 新建容器 +## 新建容器 目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Llama-70B模型推理脚本,无需再额外下载模型适配代码,直接新建容器即可。 diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md index 83caa1e4b9..d10cff9003 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md +++ b/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md @@ -11,7 +11,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa - [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/tree/main) -### 加载镜像 +## 加载镜像 前往[昇腾社区/开发资源](https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f)下载适配DeepSeek-R1-Distill-Llama-8B的镜像包:1.0.0-800I-A2-py311-openeulsr24.03-lts或1.0.0-300I-Duo-py311-openeuler24.03-lts 完成之后,请使用`docker images`命令确认查找具体镜像名称与标签。 @@ -20,7 +20,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa * 部署DeepSeek-R1-Distill-Llama-8B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 * 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" -### 新建容器 +## 新建容器 目前提供的MindIE镜像预置了DeepSeek-R1-Distill-Llama-8B模型推理脚本,无需再额外下载模型适配代码,直接新建容器即可。 diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md index 929e43b582..462f9b4fd0 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -1,5 +1,5 @@ -# DeepseekR1 +# DeepSeek-R1-Distill-Qwen-1.5B ## Usage @@ -169,8 +169,8 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin - 新建窗口测试(VLLM接口) ```shell -curl 127.0.0.1:1040/generate -d '{ -"prompt": "What's deep learning?", +curl 127.0.0.1:1025/generate -d '{ +"prompt": "What is deep learning?", "max_tokens": 32, "stream": false, "do_sample":true, diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md index 33543fcdb2..316dacd025 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -1,5 +1,5 @@ -# DeepseekR1 +# DeepSeek-R1-Distill-Qwen-7B ## Usage @@ -170,8 +170,8 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin - 新建窗口测试(VLLM接口) ```shell -curl 127.0.0.1:1040/generate -d '{ -"prompt": "What's deep learning?", +curl 127.0.0.1:1025/generate -d '{ +"prompt": "What is deep learning?", "max_tokens": 32, "stream": false, "do_sample":true, -- Gitee From 5746765e1f6f1320bd4fa41a0c8463dec97bd11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 09:39:34 +0800 Subject: [PATCH 03/18] update readme.md --- MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 2 +- MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md index 462f9b4fd0..a6cd7f73ee 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -169,7 +169,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin - 新建窗口测试(VLLM接口) ```shell -curl 127.0.0.1:1025/generate -d '{ +curl 127.0.0.1:1040/generate -d '{ "prompt": "What is deep learning?", "max_tokens": 32, "stream": false, diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md index 316dacd025..3022fc4e13 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -170,7 +170,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin - 新建窗口测试(VLLM接口) ```shell -curl 127.0.0.1:1025/generate -d '{ +curl 127.0.0.1:1040/generate -d '{ "prompt": "What is deep learning?", "max_tokens": 32, "stream": false, -- Gitee From 136e81fdf941fc0236be90b11b23f5b7034c8d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 10:15:11 +0800 Subject: [PATCH 04/18] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme=E7=9A=84?= =?UTF-8?q?=E4=BD=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DeepSeek-R1-Distill-Llama-70B/README.md | 2 +- .../DeepSeek-R1-Distill-Llama-8B/README.md | 4 +++- .../DeepSeek-R1-Distill-Qwen-1.5B/README.md | 4 ++-- .../DeepSeek-R1-Distill-Qwen-14B/README.md | 10 +++++----- .../DeepSeek-R1-Distill-Qwen-32B/README.md | 8 ++++---- .../DeepSeek-R1-Distill-Qwen-7B/README.md | 4 ++-- 6 files changed, 17 insertions(+), 15 deletions(-) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-70B/README.md (98%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-8B/README.md (95%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-1.5B/README.md (94%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-14B/README.md (90%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-32B/README.md (92%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-7B/README.md (94%) diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md similarity index 98% rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md index 1e9021ddee..9b1979b3cf 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-70B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md @@ -19,7 +19,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa ## 约束条件 * 部署DeepSeek-R1-Distill-Llama-70B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 * 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" -* 支持TP=8推理 +* Atlas 800I A2 32G服务器支持TP=8推理; Atlas 300I Duo服务器支持TP=8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md similarity index 95% rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md index d10cff9003..80e222381f 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md @@ -3,7 +3,8 @@ ## Usage -Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Llama-8B is one of them. + + ## 权重 @@ -19,6 +20,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa ## 约束条件 * 部署DeepSeek-R1-Distill-Llama-8B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 * 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" +* Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md similarity index 94% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index a6cd7f73ee..90c4ab123e 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -3,7 +3,7 @@ ## Usage -We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model. +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-1.5B is one of them. ## 权重 @@ -29,7 +29,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- 当前支持TP=1/2/4/8推理 +- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md similarity index 90% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-14B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md index b24536f9cc..2230270135 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-14B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md @@ -1,9 +1,9 @@ -# DeepseekR1 +# DeepSeek-R1-Distill-Qwen-14B ## Usage -We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model. +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-14B is one of them. ## 权重 @@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要1台800I A2 32G服务器或1台Atlas 300I Duo服务器 +- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- 当前支持TP=1/2/4/8推理 +- Atlas 800I A2 32G服务器支持TP=2/4/8推理; Atlas 300I Duo服务器支持TP=2/4推理 ## 新建容器 @@ -168,7 +168,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin ```shell curl 127.0.0.1:1040/generate -d '{ -"prompt": "What's deep learning?", +"prompt": "What is deep learning?", "max_tokens": 32, "stream": false, "do_sample":true, diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md similarity index 92% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-32B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md index a493ebd1bd..9dcd245611 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-32B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md @@ -1,9 +1,9 @@ -# DeepseekR1 +# DeepSeek-R1-Distill-Qwen-32B ## Usage -We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model. +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-32B is one of them. ## 权重 @@ -29,7 +29,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-32B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- 当前支持TP=1/2/4/8推理 +- Atlas 800I A2 32G服务器支持TP=4/8推理; Atlas 300I Duo服务器支持TP=4推理 ## 新建容器 @@ -168,7 +168,7 @@ cd /usr/local/Ascend/mindie/latest/mindie-service/bin ```shell curl 127.0.0.1:1040/generate -d '{ -"prompt": "What's deep learning?", +"prompt": "What is deep learning?", "max_tokens": 32, "stream": false, "do_sample":true, diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md similarity index 94% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index 3022fc4e13..f2bf8db692 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -3,7 +3,7 @@ ## Usage -We do not advise you to use base language models for text generation. Instead, you can apply post-training, e.g., SFT, RLHF, continued pretraining, etc., on this model. +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-7B is one of them. ## 权重 @@ -30,7 +30,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- 当前支持TP=1/2/4/8推理 +- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理 ## 新建容器 -- Gitee From 5b9b59fa374b8489ffe5e392f938f148fbf5a397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 10:26:24 +0800 Subject: [PATCH 05/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md index 9b1979b3cf..219695881a 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md @@ -17,9 +17,9 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa 完成之后,请使用`docker images`命令确认查找具体镜像名称与标签。 ## 约束条件 -* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 +* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` * 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" -* Atlas 800I A2 32G服务器支持TP=8推理; Atlas 300I Duo服务器支持TP=8推理 +* 支持TP=8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md index 80e222381f..e7076f47eb 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md @@ -18,9 +18,9 @@ 完成之后,请使用`docker images`命令确认查找具体镜像名称与标签。 ## 约束条件 -* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 +* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` * 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" -* Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理 +* 支持TP=1/2/4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index 90c4ab123e..21f6f11d73 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 +- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理 +- 支持TP=1/2/4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md index 2230270135..56c9452178 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md @@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 +- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- Atlas 800I A2 32G服务器支持TP=2/4/8推理; Atlas 300I Duo服务器支持TP=2/4推理 +- 支持TP=2/4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md index 9dcd245611..a3225312cd 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md @@ -27,9 +27,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-32B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 +- 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- Atlas 800I A2 32G服务器支持TP=4/8推理; Atlas 300I Duo服务器支持TP=4推理 +- 支持TP=4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index f2bf8db692..e58bf31d41 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -28,9 +28,9 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要1台Atlas 800I A2 32G服务器或1台Atlas 300I Duo服务器 +- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` - 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** -- Atlas 800I A2 32G服务器支持TP=1/2/4/8推理; Atlas 300I Duo服务器支持TP=1/2/4推理 +- 支持TP=1/2/4/8推理 ## 新建容器 -- Gitee From 011088a66ac95f59fa413c224e83c3acfba23ca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 10:33:02 +0800 Subject: [PATCH 06/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md index 219695881a..d3440f50cc 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md @@ -18,7 +18,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa ## 约束条件 * 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` -* 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" +* 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" * 支持TP=8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index 21f6f11d73..f6f739fbb6 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` -- 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=1/2/4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md index 56c9452178..c855b21687 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md @@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` -- 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=2/4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md index a3225312cd..21bf588ccf 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md @@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` -- 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index e58bf31d41..b3d515dbf4 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -29,7 +29,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ## 约束条件 - 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` -- 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=1/2/4/8推理 ## 新建容器 -- Gitee From 0ad1dbb76a1fd4eb81137067a8da296d1898d91f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 10:39:07 +0800 Subject: [PATCH 07/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md | 2 +- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md index d3440f50cc..c9e89f3e9c 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-70B/README.md @@ -17,7 +17,7 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned severa 完成之后,请使用`docker images`命令确认查找具体镜像名称与标签。 ## 约束条件 -* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` +* 部署DeepSeek-R1-Distill-Llama-70B模型至少需要`1台Atlas 800I A2服务器`或者`1台插4张Atlas 300I DUO卡的服务器` * 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" * 支持TP=8推理 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md index e7076f47eb..efd4330d4a 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md @@ -18,8 +18,8 @@ 完成之后,请使用`docker images`命令确认查找具体镜像名称与标签。 ## 约束条件 -* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` -* 在Atlas 300I Duo服务器部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" +* 部署DeepSeek-R1-Distill-Llama-8B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器` +* 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16" * 支持TP=1/2/4/8推理 ## 新建容器 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index f6f739fbb6..967fe1d157 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -27,7 +27,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` +- 部署DeepSeek-R1-Distill-Qwen-1.5B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器` - 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=1/2/4/8推理 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md index c855b21687..3029ae6067 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-14B/README.md @@ -27,7 +27,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` +- 部署DeepSeek-R1-Distill-Qwen-14B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器` - 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=2/4/8推理 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md index 21bf588ccf..fdacc9f0e1 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-32B/README.md @@ -27,7 +27,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` +- 部署DeepSeek-R1-Distill-Qwen-32B模型至少至少需要`1台Atlas 800I A2服务器`或者`1台插2张Atlas 300I DUO卡的服务器` - 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=4/8推理 diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index b3d515dbf4..dd96e2809b 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -28,7 +28,7 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 ``` ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插XX张Atlas 300I DUO卡的服务器` +- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器` - 在使用Atlas 300I DUO推理卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=1/2/4/8推理 -- Gitee From 3b429462150f1d733e54a6d810fa72fcfbd34636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 11:07:05 +0800 Subject: [PATCH 08/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md index efd4330d4a..f2e38076d1 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md @@ -3,7 +3,7 @@ ## Usage - +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Llama-8B is one of them. ## 权重 -- Gitee From 7d90d6a1ad378da317dc3b12613a538dee14602e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 6 Feb 2025 11:08:15 +0800 Subject: [PATCH 09/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md index f2e38076d1..2f0f50229f 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B/README.md @@ -5,7 +5,6 @@ Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Llama-8B is one of them. - ## 权重 **权重下载** -- Gitee From c72f9c05ea03b3af2c87026a5b9590ffe01b6794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Fri, 7 Feb 2025 09:14:55 +0800 Subject: [PATCH 10/18] =?UTF-8?q?=E6=96=B0=E5=A2=9Eqwen=E7=9A=84=E9=87=8F?= =?UTF-8?q?=E5=8C=96=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../README.md | 0 .../requirements.txt | 48 +-- .../README.md | 342 ++++++++--------- .../requirements.txt | 48 +-- .../DeepSeek-R1-Distill-Qwen-1.5B/README.md | 0 .../README.md | 350 +++++++++--------- .../requirements.txt | 48 +-- .../DeepSeek-R1-Distill-Qwen-7B/README.md | 17 + 8 files changed, 435 insertions(+), 418 deletions(-) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md (100%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt (94%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md (96%) rename MindIE/LLM/{DeepSeek-R1-Distill-Qwen-7B-OrangePi => DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi}/requirements.txt (94%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-1.5B/README.md (100%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md (96%) rename MindIE/LLM/{DeepSeek-R1-Distill-Qwen-1.5B-OrangePi => DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi}/requirements.txt (94%) rename MindIE/LLM/{ => DeepSeek}/DeepSeek-R1-Distill-Qwen-7B/README.md (82%) diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md similarity index 100% rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/README.md diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt similarity index 94% rename from MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt index b4ebc76e54..d3cf990cf3 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Llama-8B-OrangePi/requirements.txt @@ -1,25 +1,25 @@ -attrs==24.3.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -decorator==5.1.1 -filelock==3.17.0 -fsspec==2025.2.0 -huggingface-hub==0.28.1 -idna==3.10 -Jinja2==3.1.5 -MarkupSafe==3.0.2 -mpmath==1.3.0 -networkx==3.4.2 -numpy==1.26.0 -packaging==24.2 -psutil==6.1.1 -PyYAML==6.0.2 -regex==2024.11.6 -requests==2.32.3 -safetensors==0.5.2 -scipy==1.15.1 -sympy==1.13.3 -tokenizers==0.20.3 -transformers==4.45.1 -typing_extensions==4.12.2 +attrs==24.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +decorator==5.1.1 +filelock==3.17.0 +fsspec==2025.2.0 +huggingface-hub==0.28.1 +idna==3.10 +Jinja2==3.1.5 +MarkupSafe==3.0.2 +mpmath==1.3.0 +networkx==3.4.2 +numpy==1.26.0 +packaging==24.2 +psutil==6.1.1 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.32.3 +safetensors==0.5.2 +scipy==1.15.1 +sympy==1.13.3 +tokenizers==0.20.3 +transformers==4.45.1 +typing_extensions==4.12.2 urllib3==2.3.0 \ No newline at end of file diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md similarity index 96% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md index 80e0dbeb2e..19808ec46e 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/README.md @@ -1,171 +1,171 @@ -# DeepSeek-R1-Distill-Qwen-1.5B - -## Usage - -Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-1.5B is one of them. - -## 约束条件 -* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-1.5B模型 -* 需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096 -* 由于此硬件为单卡,仅支持TP=1 - -## 权重 - -**权重下载** - -- [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/tree/main) - -## 新建环境 - -### 1.1 安装CANN -- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann) -- 安装顺序:先安装toolkit 再安装kernel - -#### 1.1.1 安装toolkit - -- 下载 - -| cpu | 包名(其中`${version}`为实际版本) | -| ------- | ------------------------------------------------ | -| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run | - -- 安装 - ```bash - # 安装toolkit 以arm为例 - chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run - ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install - source /usr/local/Ascend/ascend-toolkit/set_env.sh - ``` - -#### 1.1.2 安装kernel - -- 下载 - -| 包名 | -| ------------------------------------------ | -| Ascend-cann-kernels*_${version}_linux.run | - - - 根据芯片型号选择对应的安装包 - -- 安装 - ```bash - chmod +x Ascend-cann-kernels-*_${version}_linux.run - ./Ascend-cann-kernels-*_${version}_linux.run --install - ``` - -#### 1.1.3 安装加速库 -- 下载加速库 - - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。 - - | 包名(其中`${version}`为实际版本) | - | -------------------------------------------- | - | Ascend-cann-nnal_${version}_linux-aarch64.run | - | ... | - - - 将文件放置在\${working_dir}路径下 - -- 安装 - ```shell - chmod +x Ascend-cann-nnal_*_linux-*.run - ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir} - source ${working_dir}/nnal/atb/set_env.sh - ``` -- 可以使用`uname -a`指令查看服务器是x86还是aarch架构 -- 可以使用以下指令查看abi是0还是1 - ```shell - python -c "import torch; print(torch.compiled_with_cxx11_abi())" - ``` - - 若输出结果为True表示abi1,False表示abi0 - -### 1.2 安装PytorchAdapter - -先安装torch 再安装torch_npu - -#### 1.2.1 安装torch - -- 下载 - - | 包名 | - | -------------------------------------------- | - | torch-2.1.0-cp310-cp10-linux_aarch64.whl | - | ... | - - - 根据所使用的环境中的python版本以及cpu类型,选择对应版本的torch安装包。 - -- 安装 - ```bash - # 安装torch 2.1.0 的python 3.10 的arm版本为例 - pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl - ``` - -#### 1.2.2 安装torch_npu - -[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt),安装方法: - -| 包名 | -| --------------------------- | -| pytorch_v2.1.0_py38.tar.gz | -| pytorch_v2.1.0_py39.tar.gz | -| pytorch_v2.1.0_py310.tar.gz | -| ... | - -- 安装选择与torch版本以及python版本一致的npu_torch版本 - -```bash -# 安装 torch_npu,以 torch 2.1.0,python 3.10 的版本为例 -tar -zxvf pytorch_v2.1.0_py310.tar.gz -pip install torch*_aarch64.whl -``` -### 1.3 安装开源软件依赖 -| 默认依赖 | [requirement.txt](./requirements.txt) | -- 开源软件依赖请使用下述命令进行安装: - ```bash - pip install -r ./requirements.txt - ``` - -### 1.4 安装模型仓 -使用编译好的包进行安装 - - 下载编译好的包 - - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann) - - | 包名 | - | ------------------------------------------------------------ | - | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz | - | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz | - | ... | - - - 将文件放置在\${working_dir}路径下 - - 解压 - ```shell - cd ${working_dir} - mkdir MindIE-LLM - cd MindIE-LLM - tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz - ``` - - 安装atb_llm whl包 - ``` - cd ${working_dir}/MindIE-LLM - # 首次安装 - pip install atb_llm-0.0.1-py3-none-any.whl - # 更新 - pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall - ``` - - -## 模型推理 - -### 对话测试 -进入llm_model路径 - -```shell -cd $ATB_SPEED_HOME_PATH -``` - -执行对话测试 - -```shell -python -m examples.run_fa_edge \ - --model_path ${权重路径} \ - --input_text 'What is deep learning?' \ - --max_output_length 20 \ -``` +# DeepSeek-R1-Distill-Qwen-1.5B + +## Usage + +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-1.5B is one of them. + +## 约束条件 +* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-1.5B模型 +* 需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096 +* 由于此硬件为单卡,仅支持TP=1 + +## 权重 + +**权重下载** + +- [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/tree/main) + +## 新建环境 + +### 1.1 安装CANN +- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann) +- 安装顺序:先安装toolkit 再安装kernel + +#### 1.1.1 安装toolkit + +- 下载 + +| cpu | 包名(其中`${version}`为实际版本) | +| ------- | ------------------------------------------------ | +| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run | + +- 安装 + ```bash + # 安装toolkit 以arm为例 + chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run + ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install + source /usr/local/Ascend/ascend-toolkit/set_env.sh + ``` + +#### 1.1.2 安装kernel + +- 下载 + +| 包名 | +| ------------------------------------------ | +| Ascend-cann-kernels*_${version}_linux.run | + + - 根据芯片型号选择对应的安装包 + +- 安装 + ```bash + chmod +x Ascend-cann-kernels-*_${version}_linux.run + ./Ascend-cann-kernels-*_${version}_linux.run --install + ``` + +#### 1.1.3 安装加速库 +- 下载加速库 + - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。 + + | 包名(其中`${version}`为实际版本) | + | -------------------------------------------- | + | Ascend-cann-nnal_${version}_linux-aarch64.run | + | ... | + + - 将文件放置在\${working_dir}路径下 + +- 安装 + ```shell + chmod +x Ascend-cann-nnal_*_linux-*.run + ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir} + source ${working_dir}/nnal/atb/set_env.sh + ``` +- 可以使用`uname -a`指令查看服务器是x86还是aarch架构 +- 可以使用以下指令查看abi是0还是1 + ```shell + python -c "import torch; print(torch.compiled_with_cxx11_abi())" + ``` + - 若输出结果为True表示abi1,False表示abi0 + +### 1.2 安装PytorchAdapter + +先安装torch 再安装torch_npu + +#### 1.2.1 安装torch + +- 下载 + + | 包名 | + | -------------------------------------------- | + | torch-2.1.0-cp310-cp10-linux_aarch64.whl | + | ... | + + - 根据所使用的环境中的python版本以及cpu类型,选择对应版本的torch安装包。 + +- 安装 + ```bash + # 安装torch 2.1.0 的python 3.10 的arm版本为例 + pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl + ``` + +#### 1.2.2 安装torch_npu + +[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt),安装方法: + +| 包名 | +| --------------------------- | +| pytorch_v2.1.0_py38.tar.gz | +| pytorch_v2.1.0_py39.tar.gz | +| pytorch_v2.1.0_py310.tar.gz | +| ... | + +- 安装选择与torch版本以及python版本一致的npu_torch版本 + +```bash +# 安装 torch_npu,以 torch 2.1.0,python 3.10 的版本为例 +tar -zxvf pytorch_v2.1.0_py310.tar.gz +pip install torch*_aarch64.whl +``` +### 1.3 安装开源软件依赖 +| 默认依赖 | [requirement.txt](./requirements.txt) | +- 开源软件依赖请使用下述命令进行安装: + ```bash + pip install -r ./requirements.txt + ``` + +### 1.4 安装模型仓 +使用编译好的包进行安装 + - 下载编译好的包 + - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann) + + | 包名 | + | ------------------------------------------------------------ | + | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz | + | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz | + | ... | + + - 将文件放置在\${working_dir}路径下 + - 解压 + ```shell + cd ${working_dir} + mkdir MindIE-LLM + cd MindIE-LLM + tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz + ``` + - 安装atb_llm whl包 + ``` + cd ${working_dir}/MindIE-LLM + # 首次安装 + pip install atb_llm-0.0.1-py3-none-any.whl + # 更新 + pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall + ``` + + +## 模型推理 + +### 对话测试 +进入llm_model路径 + +```shell +cd $ATB_SPEED_HOME_PATH +``` + +执行对话测试 + +```shell +python -m examples.run_fa_edge \ + --model_path ${权重路径} \ + --input_text 'What is deep learning?' \ + --max_output_length 20 \ +``` diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt similarity index 94% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt index b4ebc76e54..d3cf990cf3 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt @@ -1,25 +1,25 @@ -attrs==24.3.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -decorator==5.1.1 -filelock==3.17.0 -fsspec==2025.2.0 -huggingface-hub==0.28.1 -idna==3.10 -Jinja2==3.1.5 -MarkupSafe==3.0.2 -mpmath==1.3.0 -networkx==3.4.2 -numpy==1.26.0 -packaging==24.2 -psutil==6.1.1 -PyYAML==6.0.2 -regex==2024.11.6 -requests==2.32.3 -safetensors==0.5.2 -scipy==1.15.1 -sympy==1.13.3 -tokenizers==0.20.3 -transformers==4.45.1 -typing_extensions==4.12.2 +attrs==24.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +decorator==5.1.1 +filelock==3.17.0 +fsspec==2025.2.0 +huggingface-hub==0.28.1 +idna==3.10 +Jinja2==3.1.5 +MarkupSafe==3.0.2 +mpmath==1.3.0 +networkx==3.4.2 +numpy==1.26.0 +packaging==24.2 +psutil==6.1.1 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.32.3 +safetensors==0.5.2 +scipy==1.15.1 +sympy==1.13.3 +tokenizers==0.20.3 +transformers==4.45.1 +typing_extensions==4.12.2 urllib3==2.3.0 \ No newline at end of file diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md similarity index 100% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md similarity index 96% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md index 629145e95c..2c489dbc58 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/README.md @@ -1,175 +1,175 @@ -# DeepSeek-R1-Distill-Qwen-7B - -## Usage - -Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-7B is one of them. -## 约束条件 -* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-7B模型 -* 需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096 -* 由于此硬件为单卡,仅支持TP=1 - -## 权重 - -**权重下载** - -- [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/tree/main) - -## 新建环境 - -### 1.1 安装CANN -- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann) -- 安装顺序:先安装toolkit 再安装kernel - -#### 1.1.1 安装toolkit - -- 下载 - -| cpu | 包名(其中`${version}`为实际版本) | -| ------- | ------------------------------------------------ | -| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run | - -- 安装 - ```bash - # 安装toolkit 以arm为例 - chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run - ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install - source /usr/local/Ascend/ascend-toolkit/set_env.sh - ``` - -#### 1.1.2 安装kernel - -- 下载 - -| 包名 | -| ------------------------------------------ | -| Ascend-cann-kernels*_${version}_linux.run | - - - 根据芯片型号选择对应的安装包 - -- 安装 - ```bash - chmod +x Ascend-cann-kernels-*_${version}_linux.run - ./Ascend-cann-kernels-*_${version}_linux.run --install - ``` - -#### 1.1.3 安装加速库 -- 下载加速库 - - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。 - - | 包名(其中`${version}`为实际版本) | - | -------------------------------------------- | - | Ascend-cann-nnal_${version}_linux-aarch64.run | - | Ascend-cann-nnal_${version}_linux-x86_64.run | - | ... | - - - 将文件放置在\${working_dir}路径下 - -- 安装 - ```shell - chmod +x Ascend-cann-nnal_*_linux-*.run - ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir} - source ${working_dir}/nnal/atb/set_env.sh - ``` -- 可以使用`uname -a`指令查看服务器是x86还是aarch架构 -- 可以使用以下指令查看abi是0还是1 - ```shell - python -c "import torch; print(torch.compiled_with_cxx11_abi())" - ``` - - 若输出结果为True表示abi1,False表示abi0 - -### 1.2 安装PytorchAdapter - -先安装torch 再安装torch_npu - -#### 1.2.1 安装torch - -- 下载 - - | 包名 | - | -------------------------------------------- | - | torch-2.1.0+cpu-cp310-cp310-linux_x86_64.whl | - | torch-2.1.0-cp310-cp10-linux_aarch64.whl | - | ... | - - - 根据所使用的环境中的python版本以及cpu类型,选择对应版本的torch安装包。 - -- 安装 - ```bash - # 安装torch 2.1.0 的python 3.10 的arm版本为例 - pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl - ``` - -#### 1.2.2 安装torch_npu - -[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt),安装方法: - -| 包名 | -| --------------------------- | -| pytorch_v2.1.0_py38.tar.gz | -| pytorch_v2.1.0_py39.tar.gz | -| pytorch_v2.1.0_py310.tar.gz | -| ... | - -- 安装选择与torch版本以及python版本一致的npu_torch版本 - -```bash -# 安装 torch_npu,以 torch 2.1.0,python 3.10 的版本为例 -tar -zxvf pytorch_v2.1.0_py310.tar.gz -pip install torch*_aarch64.whl -``` -### 1.3 安装开源软件依赖 -| 默认依赖 | [requirement.txt](./requirements.txt) | -- 开源软件依赖请使用下述命令进行安装: - ```bash - pip install -r ./requirements.txt - ``` - -### 1.4 安装模型仓 -使用编译好的包进行安装 - - 下载编译好的包 - - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann) - - | 包名 | - | ------------------------------------------------------------ | - | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz | - | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz | - | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch1.11.0-abi1.tar.gz | - | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch2.1.0-abi1.tar.gz | - | ... | - - - 将文件放置在\${working_dir}路径下 - - 解压 - ```shell - cd ${working_dir} - mkdir MindIE-LLM - cd MindIE-LLM - tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz - ``` - - 安装atb_llm whl包 - ``` - cd ${working_dir}/MindIE-LLM - # 首次安装 - pip install atb_llm-0.0.1-py3-none-any.whl - # 更新 - pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall - ``` - - -## 纯模型推理 - -### 对话测试 -进入llm_model路径 - -```shell -cd $ATB_SPEED_HOME_PATH -``` - -执行对话测试 - -```shell -python -m examples.run_fa_edge \ - --model_path ${权重路径} \ - --input_text 'What is deep learning?' \ - --max_output_length 20 \ -``` - +# DeepSeek-R1-Distill-Qwen-7B + +## Usage + +Using the reasoning data generated by DeepSeek-R1, DeepSeek AI fine-tuned several dense models that are widely used in the research community, slightly changing their configs and tokenizers. DeepSeek-R1-Distill-Qwen-7B is one of them. +## 约束条件 +* 在20t24g 香橙派aipro上部署DeepSeek-R1-Distill-Qwen-7B模型 +* 需要修改权重目录下的config.json文件,"torch_dtype"字段改为"float16", "max_position_embedding"字段改为4096 +* 由于此硬件为单卡,仅支持TP=1 + +## 权重 + +**权重下载** + +- [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B/tree/main) + +## 新建环境 + +### 1.1 安装CANN +- 详细信息可参见[昇腾社区CANN软件](https://www.hiascend.com/software/cann) +- 安装顺序:先安装toolkit 再安装kernel + +#### 1.1.1 安装toolkit + +- 下载 + +| cpu | 包名(其中`${version}`为实际版本) | +| ------- | ------------------------------------------------ | +| aarch64 | Ascend-cann-toolkit_${version}_linux-aarch64.run | + +- 安装 + ```bash + # 安装toolkit 以arm为例 + chmod +x Ascend-cann-toolkit_${version}_linux-aarch64.run + ./Ascend-cann-toolkit_${version}_linux-aarch64.run --install + source /usr/local/Ascend/ascend-toolkit/set_env.sh + ``` + +#### 1.1.2 安装kernel + +- 下载 + +| 包名 | +| ------------------------------------------ | +| Ascend-cann-kernels*_${version}_linux.run | + + - 根据芯片型号选择对应的安装包 + +- 安装 + ```bash + chmod +x Ascend-cann-kernels-*_${version}_linux.run + ./Ascend-cann-kernels-*_${version}_linux.run --install + ``` + +#### 1.1.3 安装加速库 +- 下载加速库 + - [下载链接](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/261918053?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。 + + | 包名(其中`${version}`为实际版本) | + | -------------------------------------------- | + | Ascend-cann-nnal_${version}_linux-aarch64.run | + | Ascend-cann-nnal_${version}_linux-x86_64.run | + | ... | + + - 将文件放置在\${working_dir}路径下 + +- 安装 + ```shell + chmod +x Ascend-cann-nnal_*_linux-*.run + ./Ascend-cann-nnal_*_linux-*.run --install --install-path=${working_dir} + source ${working_dir}/nnal/atb/set_env.sh + ``` +- 可以使用`uname -a`指令查看服务器是x86还是aarch架构 +- 可以使用以下指令查看abi是0还是1 + ```shell + python -c "import torch; print(torch.compiled_with_cxx11_abi())" + ``` + - 若输出结果为True表示abi1,False表示abi0 + +### 1.2 安装PytorchAdapter + +先安装torch 再安装torch_npu + +#### 1.2.1 安装torch + +- 下载 + + | 包名 | + | -------------------------------------------- | + | torch-2.1.0+cpu-cp310-cp310-linux_x86_64.whl | + | torch-2.1.0-cp310-cp10-linux_aarch64.whl | + | ... | + + - 根据所使用的环境中的python版本以及cpu类型,选择对应版本的torch安装包。 + +- 安装 + ```bash + # 安装torch 2.1.0 的python 3.10 的arm版本为例 + pip install torch-2.1.0-cp310-cp310-linux_aarch64.whl + ``` + +#### 1.2.2 安装torch_npu + +[下载PyTorch Adapter](https://www.hiascend.com/developer/download/community/result?module=pt),安装方法: + +| 包名 | +| --------------------------- | +| pytorch_v2.1.0_py38.tar.gz | +| pytorch_v2.1.0_py39.tar.gz | +| pytorch_v2.1.0_py310.tar.gz | +| ... | + +- 安装选择与torch版本以及python版本一致的npu_torch版本 + +```bash +# 安装 torch_npu,以 torch 2.1.0,python 3.10 的版本为例 +tar -zxvf pytorch_v2.1.0_py310.tar.gz +pip install torch*_aarch64.whl +``` +### 1.3 安装开源软件依赖 +| 默认依赖 | [requirement.txt](./requirements.txt) | +- 开源软件依赖请使用下述命令进行安装: + ```bash + pip install -r ./requirements.txt + ``` + +### 1.4 安装模型仓 +使用编译好的包进行安装 + - 下载编译好的包 + - [下载链接](https://www.hiascend.com/developer/download/community/result?module=ie+pt+cann) + + | 包名 | + | ------------------------------------------------------------ | + | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch1.11.0-abi0.tar.gz | + | Ascend-mindie-atb-models_1.0.RC1_linux-aarch64_torch2.1.0-abi1.tar.gz | + | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch1.11.0-abi1.tar.gz | + | Ascend-mindie-atb-models_1.0.RC1_linux-x86_64_torch2.1.0-abi1.tar.gz | + | ... | + + - 将文件放置在\${working_dir}路径下 + - 解压 + ```shell + cd ${working_dir} + mkdir MindIE-LLM + cd MindIE-LLM + tar -zxvf ../Ascend-mindie-atb-models_*_linux-*_torch*-abi*.tar.gz + ``` + - 安装atb_llm whl包 + ``` + cd ${working_dir}/MindIE-LLM + # 首次安装 + pip install atb_llm-0.0.1-py3-none-any.whl + # 更新 + pip install atb_llm-0.0.1-py3-none-any.whl --force-reinstall + ``` + + +## 纯模型推理 + +### 对话测试 +进入llm_model路径 + +```shell +cd $ATB_SPEED_HOME_PATH +``` + +执行对话测试 + +```shell +python -m examples.run_fa_edge \ + --model_path ${权重路径} \ + --input_text 'What is deep learning?' \ + --max_output_length 20 \ +``` + diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt similarity index 94% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt index b4ebc76e54..d3cf990cf3 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-1.5B-OrangePi/requirements.txt +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B-OrangePi/requirements.txt @@ -1,25 +1,25 @@ -attrs==24.3.0 -certifi==2025.1.31 -charset-normalizer==3.4.1 -decorator==5.1.1 -filelock==3.17.0 -fsspec==2025.2.0 -huggingface-hub==0.28.1 -idna==3.10 -Jinja2==3.1.5 -MarkupSafe==3.0.2 -mpmath==1.3.0 -networkx==3.4.2 -numpy==1.26.0 -packaging==24.2 -psutil==6.1.1 -PyYAML==6.0.2 -regex==2024.11.6 -requests==2.32.3 -safetensors==0.5.2 -scipy==1.15.1 -sympy==1.13.3 -tokenizers==0.20.3 -transformers==4.45.1 -typing_extensions==4.12.2 +attrs==24.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +decorator==5.1.1 +filelock==3.17.0 +fsspec==2025.2.0 +huggingface-hub==0.28.1 +idna==3.10 +Jinja2==3.1.5 +MarkupSafe==3.0.2 +mpmath==1.3.0 +networkx==3.4.2 +numpy==1.26.0 +packaging==24.2 +psutil==6.1.1 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.32.3 +safetensors==0.5.2 +scipy==1.15.1 +sympy==1.13.3 +tokenizers==0.20.3 +transformers==4.45.1 +typing_extensions==4.12.2 urllib3==2.3.0 \ No newline at end of file diff --git a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md similarity index 82% rename from MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md rename to MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index dd96e2809b..8da84957cb 100644 --- a/MindIE/LLM/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -185,6 +185,23 @@ curl 127.0.0.1:1040/generate -d '{ > 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html) +## Atlas 800I A2 量化 +Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)(昇腾压缩加速工具)实现。 +- 注意该量化方式仅支持在Atlas 800I A2服务器上运行 +- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md) +- git clone下载msit仓代码; `git clone https://gitee.com/ascend/msit.git` +- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`;并在进入的msmodelslim目录下,运行安装脚本 `bash install.sh`; +- 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`;并在进入的Qwen目录下,运行量化转换脚本 +```bash +python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 8 --a_bit 8 --device_type npu +``` +- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。 +- 如果需要使用npu多卡量化,请先配置环境变量,支持多卡量化,建议双卡执行量化: +```bash +export ASCEND_RT_VISIBLE_DEVICES=0,1 +export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False +``` + ## 常见问题 1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。 -- Gitee From 9fb7647e7c2a504c483f0e7543ea1b4c72a4d3d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Fri, 7 Feb 2025 09:30:24 +0800 Subject: [PATCH 11/18] fix --- .../DeepSeek-R1-Distill-Qwen-7B/README.md | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index 8da84957cb..6aea2621c4 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -80,6 +80,23 @@ docker run -it -d --net=host --shm-size=1g \ docker exec -it ${容器名称} bash ``` +## Atlas 800I A2 量化 +Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)(昇腾压缩加速工具)实现。 +- 注意该量化方式仅支持在Atlas 800I A2服务器上运行 +- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md) +- git clone下载msit仓代码; `git clone https://gitee.com/ascend/msit.git` +- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`;并在进入的msmodelslim目录下,运行安装脚本 `bash install.sh`; +- 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`;并在进入的Qwen目录下,运行量化转换脚本 +```bash +python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 8 --a_bit 8 --device_type npu +``` +- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。 +- 如果需要使用npu多卡量化,请先配置环境变量,支持多卡量化,建议双卡执行量化: +```bash +export ASCEND_RT_VISIBLE_DEVICES=0,1 +export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False +``` + ## 纯模型推理 ### 对话测试 @@ -185,23 +202,6 @@ curl 127.0.0.1:1040/generate -d '{ > 注: 服务化推理的更多信息请参考[MindIE Service用户指南](https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0001.html) -## Atlas 800I A2 量化 -Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)(昇腾压缩加速工具)实现。 -- 注意该量化方式仅支持在Atlas 800I A2服务器上运行 -- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md) -- git clone下载msit仓代码; `git clone https://gitee.com/ascend/msit.git` -- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`;并在进入的msmodelslim目录下,运行安装脚本 `bash install.sh`; -- 进入到msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen`;并在进入的Qwen目录下,运行量化转换脚本 -```bash -python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 8 --a_bit 8 --device_type npu -``` -- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。 -- 如果需要使用npu多卡量化,请先配置环境变量,支持多卡量化,建议双卡执行量化: -```bash -export ASCEND_RT_VISIBLE_DEVICES=0,1 -export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False -``` - ## 常见问题 1. ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils'. 降低transformers版本可解决。 -- Gitee From fd698198b3efa043dd63f30b64c9c571f80d40f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Fri, 7 Feb 2025 17:38:10 +0800 Subject: [PATCH 12/18] =?UTF-8?q?update=20deepseek-qwen-7b=20=E9=87=8F?= =?UTF-8?q?=E5=8C=96=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DeepSeek-R1-Distill-Qwen-7B/README.md | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index 6aea2621c4..e04eba4870 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -80,8 +80,9 @@ docker run -it -d --net=host --shm-size=1g \ docker exec -it ${容器名称} bash ``` -## Atlas 800I A2 量化 -Atlas 800I A2 量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)(昇腾压缩加速工具)实现。 +## 权重量化 +### W8A8量化 +W8A8量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)(昇腾压缩加速工具)实现。 - 注意该量化方式仅支持在Atlas 800I A2服务器上运行 - 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md) - git clone下载msit仓代码; `git clone https://gitee.com/ascend/msit.git` @@ -96,6 +97,38 @@ python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8 export ASCEND_RT_VISIBLE_DEVICES=0,1 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False ``` +### 稀疏量化 + - Step 1 + - 注意该量化方式仅支持在Atlas 300I DUO推理卡上运行 + - 修改模型权重config.json中`torch_dtype`字段为`float16` + - 下载msmodelslim量化工具 + - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim + - 根据msmodelslim量化工具readme进行相关操作 + 注: 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空 + ```shell + # 执行"jq --version"查看是否安装jq,若返回"bash:jq:command not found",则依次执行"apt-get update"和"apt install jq" + jq --version + # 设置CANN包的环境变量 + source /usr/local/Ascend/ascend-toolkit/set_env.sh + cd ${llm_path} + # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 + # 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 + vi examples/models/qwen/convert_quant_weight.sh + bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w4a8 + ``` + + - Step 2:量化权重切分及压缩 + ```shell + export IGNORE_INFER_ERROR=1 + torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径} + ``` + - TP数为tensor parallel并行个数 + - 注意:若权重生成时以TP=2进行切分,则运行时也需以TP=2运行 + - 示例 + ```shell + torchrun --nproc_per_node 2 -m examples.convert.model_slim.sparse_compressor --model_path /data1/weights/model_slim/Qwen-7b_w8a8s --save_directory /data1/weights/model_slim/Qwen-7b_w8a8sc + ``` + ## 纯模型推理 -- Gitee From 547bc78e16d56421f7aa83e5edc267e5e52c6ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Sat, 8 Feb 2025 17:15:15 +0800 Subject: [PATCH 13/18] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E7=A8=80=E7=96=8F?= =?UTF-8?q?=E9=87=8F=E5=8C=96=E7=9A=84=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index e04eba4870..d310ab5f67 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -104,6 +104,7 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False - 下载msmodelslim量化工具 - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim - 根据msmodelslim量化工具readme进行相关操作 + - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/qwen的目录 `cd msit/msmodelslim/example/Qwen` 注: 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空 ```shell # 执行"jq --version"查看是否安装jq,若返回"bash:jq:command not found",则依次执行"apt-get update"和"apt install jq" @@ -113,8 +114,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False cd ${llm_path} # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 # 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 - vi examples/models/qwen/convert_quant_weight.sh - bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w4a8 + # 运行量化转换脚本 + python3 quant_qwen.py --model_path {} --save_directory {} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True ``` - Step 2:量化权重切分及压缩 -- Gitee From c7bf249343ae79ea0ec5cb7b0be8f8becf52387d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Sat, 8 Feb 2025 17:30:24 +0800 Subject: [PATCH 14/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index d310ab5f67..712dae6289 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -109,13 +109,9 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False ```shell # 执行"jq --version"查看是否安装jq,若返回"bash:jq:command not found",则依次执行"apt-get update"和"apt install jq" jq --version - # 设置CANN包的环境变量 - source /usr/local/Ascend/ascend-toolkit/set_env.sh - cd ${llm_path} - # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 - # 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 + # 运行量化转换脚本 - python3 quant_qwen.py --model_path {} --save_directory {} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True + python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True ``` - Step 2:量化权重切分及压缩 -- Gitee From 87f8c394308bccc694bfa2782daf42a1da26bc25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Sat, 8 Feb 2025 17:42:02 +0800 Subject: [PATCH 15/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index 712dae6289..617438f4cb 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -109,7 +109,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False ```shell # 执行"jq --version"查看是否安装jq,若返回"bash:jq:command not found",则依次执行"apt-get update"和"apt install jq" jq --version - + # 指定当前机器上可用的逻辑NPU核心 通过修改export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 + export ASCEND_RT_VISIBLE_DEVICES=0 # 运行量化转换脚本 python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True ``` -- Gitee From 9e3344d09e509ae8dab5f791423e555be6fcf617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Fri, 14 Feb 2025 16:36:18 +0800 Subject: [PATCH 16/18] =?UTF-8?q?=E6=96=B0=E5=A2=9E1.5B=20=E9=87=8F?= =?UTF-8?q?=E5=8C=96=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DeepSeek-R1-Distill-Qwen-1.5B/README.md | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index cd61e32d6a..382f1aa591 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -87,6 +87,58 @@ docker run -it -d --net=host --shm-size=1g \ ```shell docker exec -it ${容器名称} bash ``` +## 权重量化 +### W8A8量化 +W8A8量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master/msmodelslim/example/Qwen/README.md)(昇腾压缩加速工具)实现。 +- 注意该量化方式仅支持在Atlas 800I A2服务器上运行 +- 环境配置请参考[使用说明](https://gitee.com/ascend/msit/blob/master/msmodelslim/README.md) +- git clone下载msit仓代码; `git clone https://gitee.com/ascend/msit.git` +- 进入到msit/msmodelslim的目录 `cd msit/msmodelslim`;并在进入的msmodelslim目录下,运行安装脚本 `bash install.sh`; +```bash +# 设置CANN包的环境变量 +source /usr/local/Ascend/ascend-toolkit/set_env.sh +cd ${llm_path} +# 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 +# 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 +vi examples/models/qwen/convert_quant_weight.sh +bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w8a8 +``` +- 请将{浮点权重路径}和{量化权重路径}替换为用户实际路径。 +- 如果需要使用npu多卡量化,请先配置环境变量,支持多卡量化,建议双卡执行量化: +```bash +export ASCEND_RT_VISIBLE_DEVICES=0,1 +export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False +``` +### 稀疏量化 + - Step 1 + - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行 + - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化 + - 修改模型权重config.json中`torch_dtype`字段为`float16` + - 下载msmodelslim量化工具 + - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim + - 根据msmodelslim量化工具readme进行相关操作 + - 进入到{msModelSlim工具路径}/msit/msmodelslim/example/Qwen的目录 `cd msit/msmodelslim/example/Qwen` + 注: 安装完cann后 需要执行source set_env.sh 声明ASCEND_HOME_PATH值 后续安装msmodelslim前需保证其不为空 + ```shell + # 执行"jq --version"查看是否安装jq,若返回"bash:jq:command not found",则依次执行"apt-get update"和"apt install jq" + jq --version + + # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 + export ASCEND_RT_VISIBLE_DEVICES=0 + python3 quant_qwen.py --model_path {浮点权重路径} --save_directory {W8A8S量化权重路径} --calib_file ../common/boolq.jsonl --w_bit 4 --a_bit 8 --fraction 0.011 --co_sparse True --device_type npu --use_sigma True --is_lowbit True + ``` + + - Step 2:量化权重切分及压缩 + ```shell + export IGNORE_INFER_ERROR=1 + torchrun --nproc_per_node {TP数} -m examples.convert.model_slim.sparse_compressor --model_path {W8A8S量化权重路径} --save_directory {W8A8SC量化权重路径} + ``` + - TP数为tensor parallel并行个数 + - 注意:若权重生成时以TP=2进行切分,则运行时也需以TP=2运行 + - 示例 + ```shell + torchrun --nproc_per_node 2 -m examples.convert.model_slim.sparse_compressor --model_path /data1/weights/model_slim/Qwen-7b_w8a8s --save_directory /data1/weights/model_slim/Qwen-7b_w8a8sc + ``` ## 纯模型推理 -- Gitee From abfbdaa65773a0e0c5873a7e2bbaea4231a7295c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Fri, 14 Feb 2025 16:57:24 +0800 Subject: [PATCH 17/18] fix --- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index 382f1aa591..d85d9d1896 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -99,7 +99,6 @@ W8A8量化权重可通过[msmodelslim](https://gitee.com/ascend/msit/blob/master source /usr/local/Ascend/ascend-toolkit/set_env.sh cd ${llm_path} # 指定当前机器上可用的逻辑NPU核心 通过修改convert_quant_weight.sh文件中export ASCEND_RT_VISIBLE_DEVICES值 指定使用卡号及数量 -# 7b系列使用单卡 14b 32b使用4卡 eg: ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 vi examples/models/qwen/convert_quant_weight.sh bash examples/models/qwen/convert_quant_weight.sh -src {浮点权重路径} -dst {W8A8量化权重路径} -type qwen_w8a8 ``` -- Gitee From 0fb5df432b9511244f46fb1a74b9a5f5410418e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8F=B6=E4=B8=80=E5=B8=86?= Date: Thu, 10 Apr 2025 10:36:28 +0800 Subject: [PATCH 18/18] =?UTF-8?q?=E5=88=B7=E6=96=B0readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md | 4 ++-- MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md index d85d9d1896..8e1a814c66 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B/README.md @@ -110,8 +110,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False ``` ### 稀疏量化 - Step 1 - - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行 - - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化 + - 注意该量化方式仅支持在Atlas 300I DUO卡上运行 + - Atlas 300I DUO不支持多卡量化 - 修改模型权重config.json中`torch_dtype`字段为`float16` - 下载msmodelslim量化工具 - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim diff --git a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md index 2481f11883..cd6cbc560d 100644 --- a/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md +++ b/MindIE/LLM/DeepSeek/DeepSeek-R1-Distill-Qwen-7B/README.md @@ -37,8 +37,8 @@ docker load -i mindie:1.0.0-300I-Duo-py311-openeuler24.03-lts(下载的镜像名 | HDK | 24.1.0 | ## 约束条件 -- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器`或者`1台插1张Atlas 300I Pro推理卡的服务器`或者`1台插1张Atlas 300V视频解析卡的服务器` -- 在使用Atlas 300I DUO/Atlas 300I Pro推理卡和Atlas 300V视频解析卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** +- 部署DeepSeek-R1-Distill-Qwen-7B模型至少需要`1台Atlas 800I A2服务器`或者`1台插1张Atlas 300I DUO卡的服务器` +- 在使用Atlas 300I DUO卡部署模型时,需要修改权重目录下的`config.json`文件,**"torch_dtype"字段改为"float16"** - 支持TP=1/2/4/8推理 ## 新建容器 @@ -108,8 +108,8 @@ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:False ``` ### 稀疏量化 - Step 1 - - 注意该量化方式仅支持在Atlas 300I DUO/Atlas 300I Pro/Atlas 300V卡上运行 - - Atlas 300I DUO/Atlas 300I Pro/Atlas 300V不支持多卡量化 + - 注意该量化方式仅支持在Atlas 300I DUO卡上运行 + - Atlas 300I DUO不支持多卡量化 - 修改模型权重config.json中`torch_dtype`字段为`float16` - 下载msmodelslim量化工具 - 下载地址为https://gitee.com/ascend/msit/tree/master/msmodelslim -- Gitee