From 84dc881edf6e28a76562f5aa62309e5cd9178029 Mon Sep 17 00:00:00 2001
From: zhouwenxue <zhouwenxue2@huawei.com>
Date: Wed, 12 Feb 2025 11:20:36 +0800
Subject: [PATCH] =?UTF-8?q?CogVideoX=E5=A4=9A=E5=8D=A1=E5=B9=B6=E8=A1=8C?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20&=20README=E8=A1=A5=E5=85=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 MindIE/MultiModal/CogVideoX/README.md                          | 3 ++-
 .../MultiModal/CogVideoX/cogvideox_5b/utils/parallel_state.py  | 2 +-
 MindIE/MultiModal/CogVideoX/inference.py                       | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/MindIE/MultiModal/CogVideoX/README.md b/MindIE/MultiModal/CogVideoX/README.md
index bf9d6697ba..ff948b45ee 100644
--- a/MindIE/MultiModal/CogVideoX/README.md
+++ b/MindIE/MultiModal/CogVideoX/README.md
@@ -169,7 +169,8 @@ TASK_QUEUE_ENABLE=2 ASCEND_RT_VISIBLE_DEVICES=0 torchrun --master_port=2002 --np
 - num_inference_steps：推理迭代步数，默认值为50。
 - dtype：数据类型，默认值为bfloat16。CogVideoX-2b推荐设置为float16，需要在命令前加INF_NAN_MODE_FORCE_DISABLE=1，开启饱和模式避免数值溢出。
 - seed: 设置随机种子，默认值为42。
-- enable_skip：是否使用采样优化。
+- enable_skip：是否使用采样优化，注意是有损的加速算法。
+
 推理结束后会在当前路径下生成result.json，用于记录文本提示和生成视频的对应关系，便于测试视频精度。
 
 
diff --git a/MindIE/MultiModal/CogVideoX/cogvideox_5b/utils/parallel_state.py b/MindIE/MultiModal/CogVideoX/cogvideox_5b/utils/parallel_state.py
index 3ca14c2089..a7fa9f9f86 100644
--- a/MindIE/MultiModal/CogVideoX/cogvideox_5b/utils/parallel_state.py
+++ b/MindIE/MultiModal/CogVideoX/cogvideox_5b/utils/parallel_state.py
@@ -265,7 +265,7 @@ def set_parallel(pipe):
         image_embeds = output[:, text_len:, :].reshape(batch, num_frames, -1, output.shape[-1])
 
         text_embeds = split_tensor(text_embeds, -2, get_sp_world_size(), get_sp_group())
-        image_embeds = split_tensor(image_embeds, -2, get_sp_world_size(), get_sp_group(), scale=2)
+        image_embeds = split_tensor(image_embeds, -2, get_sp_world_size(), get_sp_group())
         image_embeds = image_embeds.reshape(batch, -1, image_embeds.shape[-1])
         return torch.cat([text_embeds, image_embeds], dim=1)
 
diff --git a/MindIE/MultiModal/CogVideoX/inference.py b/MindIE/MultiModal/CogVideoX/inference.py
index 3571d9bb8c..cadc94c758 100644
--- a/MindIE/MultiModal/CogVideoX/inference.py
+++ b/MindIE/MultiModal/CogVideoX/inference.py
@@ -101,7 +101,7 @@ def generate_video(
         export_to_video(video_generate, video_path, fps=fps)
         result[os.path.abspath(video_path)] = prompt
 
-    with open('result_2b_46.json', 'w', encoding='utf-8') as json_file:
+    with open('result.json', 'w', encoding='utf-8') as json_file:
         json.dump(result, json_file, ensure_ascii=False, indent=4)
     
     print(f"Result saved to result.json.")
-- 
Gitee