From 80670bd924872e0cd9a146ea54500efab01f26f2 Mon Sep 17 00:00:00 2001
From: taoyuan-guo <guotaoyuan1@h-partners.com>
Date: Fri, 14 Mar 2025 17:19:46 +0800
Subject: [PATCH 1/3] =?UTF-8?q?hunyuanvideo=E5=88=9D=E7=89=88=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../HunyuanVideo/hyvideo/utils/ file_utils.py |  86 ++++++++++
 .../HunyuanVideo/hyvideo/utils/__init__.py    |   0
 .../HunyuanVideo/hyvideo/utils/data_utils.py  |  30 ++++
 .../HunyuanVideo/hyvideo/utils/helpers.py     |  54 +++++++
 .../hyvideo/utils/parallel_mgr.py             | 151 ++++++++++++++++++
 ...preprocess_text_encoder_tokenizer_utils.py |  64 ++++++++
 6 files changed, 385 insertions(+)
 create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py
 create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py
 create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
 create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
 create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
 create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py

diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py
new file mode 100644
index 0000000000..e518cb109d
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py	
@@ -0,0 +1,86 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+from pathlib import Path
+from einops import rearrange
+
+import torch
+import torchvision
+import numpy as np
+import imageio
+
+CODE_SUFFIXES = {
+    ".py",  # Python codes
+    ".sh",  # Shell scripts
+    ".yaml",
+    ".yml",  # Configuration files
+}
+
+
+def safe_dir(path):
+    """
+    Create a directory (or the parent directory of a file) if it does not exist.
+
+    Args:
+        path (str or Path): Path to the directory.
+
+    Returns:
+        path (Path): Path object of the directory.
+    """
+    path = Path(path)
+    path.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+def safe_file(path):
+    """
+    Create the parent directory of a file if it does not exist.
+
+    Args:
+        path (str or Path): Path to the file.
+
+    Returns:
+        path (Path): Path object of the file.
+    """
+    path = Path(path)
+    path.parent.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=1, fps=24):
+    """save videos by video tensor
+       copy from https://github.com/guoyww/AnimateDiff/blob/e92bd5671ba62c0d774a32951453e328018b7c5b/animatediff/utils/util.py#L61
+
+    Args:
+        videos (torch.Tensor): video tensor predicted by the model
+        path (str): path to save video
+        rescale (bool, optional): rescale the video tensor from [-1, 1] to  . Defaults to False.
+        n_rows (int, optional): Defaults to 1.
+        fps (int, optional): video save fps. Defaults to 8.
+    """
+    videos = rearrange(videos, "b c t h w -> t b c h w")
+    outputs = []
+    for x in videos:
+        x = torchvision.utils.make_grid(x, nrow=n_rows)
+        x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
+        if rescale:
+            x = (x + 1.0) / 2.0  # -1,1 -> 0,1
+        x = torch.clamp(x, 0, 1)
+        x = (x * 255).numpy().astype(np.uint8)
+        outputs.append(x)
+
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    imageio.mimsave(path, outputs, fps=fps)
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
new file mode 100644
index 0000000000..de00d80d59
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
@@ -0,0 +1,30 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import numpy as np
+import math
+
+
+def align_to(value, alignment):
+    """align hight, width according to alignment
+
+    Args:
+        value (int): height or width
+        alignment (int): target alignment factor
+
+    Returns:
+        int: the aligned value
+    """
+    return int(math.ceil(value / alignment) * alignment)
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
new file mode 100644
index 0000000000..f4df023c5a
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
@@ -0,0 +1,54 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import collections.abc
+from itertools import repeat
+
+
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+            x = tuple(x)
+            if len(x) == 1:
+                x = tuple(repeat(x[0], n))
+            return x
+        return tuple(repeat(x, n))
+    return parse
+
+
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+
+
+def as_tuple(x):
+    if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+        return tuple(x)
+    if x is None or isinstance(x, (int, float, str)):
+        return (x,)
+    else:
+        raise ValueError(f"Unknown type {type(x)}")
+
+
+def as_list_of_2tuple(x):
+    x = as_tuple(x)
+    if len(x) == 1:
+        x = (x[0], x[0])
+    assert len(x) % 2 == 0, f"Expect even length, got {len(x)}."
+    lst = []
+    for i in range(0, len(x), 2):
+        lst.append((x[i], x[i + 1]))
+    return lst
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
new file mode 100644
index 0000000000..46c86366e3
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
@@ -0,0 +1,151 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import torch
+import logging
+from typing import List, Optional, Union
+import torch.distributed as dist
+from yunchang import set_seq_parallel_pg
+
+logger = logging.getLogger(__name__)
+
+
+def init_distributed_environment(
+    world_size: int = -1,
+    rank: int = -1,
+    distributed_init_method: str = "env://",
+    local_rank: int = -1,
+    backend: str = "hccl"
+):
+    logger.debug(
+        "world_size=%d rank=%d local_rank=%d " "distributed_init_method=%s backend=%s",
+        world_size,
+        rank,
+        local_rank,
+        distributed_init_method,
+        backend,
+    )
+    if not torch.distributed.is_initialized():
+        assert distributed_init_method is not None, (
+            "distributed_init_method must be provided when initializing "
+            "distributed environment"
+        )
+        # this backend is used for WORLD
+        torch.distributed.init_process_group(
+            backend=backend,
+            init_method=distributed_init_method,
+            world_size=world_size,
+            rank=rank,
+        )
+    torch.npu.set_device(rank)
+
+
+def initialize_model_parallel(
+    data_parallel_degree: int = 1,
+    classifier_free_guidance_degree: int = 1,
+    sequence_parallel_degree: int = 1,
+    ulysses_degree: int = 1,
+    ring_degree: int = 1,
+    tensor_parallel_degree: int = 1,
+    pipeline_parallel_degree: int = 1,
+    vae_parallel_size: int = 0,
+    backend: Optional[str] = None,
+) -> None:
+
+    assert torch.distributed.is_initialized()
+    world_size: int = torch.distributed.get_world_size()
+    backend = backend
+
+    dit_parallel_size = (data_parallel_degree *
+                     classifier_free_guidance_degree *
+                     sequence_parallel_degree *
+                     pipeline_parallel_degree *
+                     tensor_parallel_degree)
+
+    if world_size < dit_parallel_size:
+        raise RuntimeError(
+            f"world_size ({world_size}) is less than "
+            f"tensor_parallel_degree ({tensor_parallel_degree}) x "
+            f"pipeline_parallel_degree ({pipeline_parallel_degree}) x"
+            f"sequence_parallel_degree ({sequence_parallel_degree}) x"
+            f"classifier_free_guidance_degree "
+            f"({classifier_free_guidance_degree}) x"
+            f"data_parallel_degree ({data_parallel_degree})"
+        )
+    if world_size == 8:
+        set_seq_parallel_pg(
+            sp_ulysses_degree=ulysses_degree,
+            sp_ring_degree=ring_degree,
+            rank=dist.get_rank(),
+            world_size=world_size
+        )
+    elif world_size == 16:
+        set_seq_parallel_pg(
+            sp_ulysses_degree=ulysses_degree,
+            sp_ring_degree=ring_degree,
+            rank=dist.get_rank(),
+            world_size=world_size,
+            use_ulysses_low=False
+        )
+
+
+def get_sequence_parallel_world_size():
+    return dist.get_world_size()
+
+
+def get_sequence_parallel_rank():
+    return dist.get_rank()
+
+
+def all_gather(
+    input_: torch.Tensor, dim: int = 0, separate_tensors: bool = False
+) -> Union[torch.Tensor, List[torch.Tensor]]:
+    world_size = get_sequence_parallel_world_size()
+    if world_size == 1:
+        return input_
+    assert (
+        -input_.dim() <= dim < input_.dim()
+    ), f"Invalid dim ({dim}) for input tensor with shape {input_.size()}"
+    if dim < 0:
+        # Convert negative dim to positive.
+        dim += input_.dim()
+    # Allocate output tensor.
+    input_size = list(input_.size())
+    input_size[0] *= world_size
+    output_tensor = torch.empty(
+        input_size, dtype=input_.dtype, device=input_.device
+    )
+    # All-gather.
+    torch.distributed.all_gather_into_tensor(
+        output_tensor, input_
+    )
+    if dim != 0:
+        input_size[0] //= world_size
+        output_tensor = output_tensor.reshape([world_size, ] + input_size)
+        output_tensor = output_tensor.movedim(0, dim)
+    if separate_tensors:
+        tensor_list = [
+            output_tensor.view(-1)
+            .narrow(0, input_.numel() * i, input_.numel())
+            .view_as(input_)
+            for i in range(world_size)
+        ]
+        return tensor_list
+    else:
+        input_size = list(input_.size())
+        input_size[dim] = input_size[dim] * world_size
+        # Reshape
+        output_tensor = output_tensor.reshape(input_size)
+        return output_tensor
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py
new file mode 100644
index 0000000000..923dba211a
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py
@@ -0,0 +1,64 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import argparse
+import torch
+from transformers import (
+    AutoProcessor,
+    LlavaForConditionalGeneration,
+)
+import torch
+import torch_npu
+from torch_npu.contrib import transfer_to_npu
+
+
+def preprocess_text_encoder_tokenizer(args):
+
+    processor = AutoProcessor.from_pretrained(args.input_dir)
+    model = LlavaForConditionalGeneration.from_pretrained(
+        args.input_dir,
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+    ).to(0)
+
+    model.language_model.save_pretrained(
+        f"{args.output_dir}"
+    )
+    processor.tokenizer.save_pretrained(
+        f"{args.output_dir}"
+    )
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input_dir",
+        type=str,
+        required=True,
+        help="The path to the llava-llama-3-8b-v1_1-transformers.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="",
+        help="The output path of the llava-llama-3-8b-text-encoder-tokenizer."
+        "if '', the parent dir of output will be the same as input dir.",
+    )
+    args = parser.parse_args()
+
+    if len(args.output_dir) == 0:
+        args.output_dir = "/".join(args.input_dir.split("/")[:-1])
+
+    preprocess_text_encoder_tokenizer(args)
\ No newline at end of file
-- 
Gitee


From 006a3fe378ed3872a00080c72c7935a658e3c7ac Mon Sep 17 00:00:00 2001
From: taoyuan-guo <guotaoyuan1@h-partners.com>
Date: Fri, 14 Mar 2025 17:49:42 +0800
Subject: [PATCH 2/3] =?UTF-8?q?hunyuanvideo=E5=88=9D=E7=89=88=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../HunyuanVideo/hyvideo/utils/data_utils.py  |  2 +-
 .../hyvideo/utils/parallel_mgr.py             | 22 +++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
index de00d80d59..a34eff9652 100644
--- a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 
-import numpy as np
 import math
+import numpy as np
 
 
 def align_to(value, alignment):
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
index 46c86366e3..abce0a38f4 100644
--- a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 # ==============================================================================
 
-import torch
 import logging
+import torch
+
 from typing import List, Optional, Union
 import torch.distributed as dist
+
 from yunchang import set_seq_parallel_pg
 
 logger = logging.getLogger(__name__)
@@ -38,10 +40,11 @@ def init_distributed_environment(
         backend,
     )
     if not torch.distributed.is_initialized():
-        assert distributed_init_method is not None, (
-            "distributed_init_method must be provided when initializing "
-            "distributed environment"
-        )
+        if distributed_init_method is None:
+            raise ValueError(
+                "distributed_init_method must be provided when initializing "
+                "distributed environment"
+            )
         # this backend is used for WORLD
         torch.distributed.init_process_group(
             backend=backend,
@@ -64,7 +67,9 @@ def initialize_model_parallel(
     backend: Optional[str] = None,
 ) -> None:
 
-    assert torch.distributed.is_initialized()
+    if not torch.distributed.is_initialized():
+        raise ValueError("Distributed process group has not been initialized")
+    
     world_size: int = torch.distributed.get_world_size()
     backend = backend
 
@@ -115,9 +120,8 @@ def all_gather(
     world_size = get_sequence_parallel_world_size()
     if world_size == 1:
         return input_
-    assert (
-        -input_.dim() <= dim < input_.dim()
-    ), f"Invalid dim ({dim}) for input tensor with shape {input_.size()}"
+    if dim >= input_.dim() or dim < -input_.dim():
+        raise ValueError(f"Invalid dim ({dim}) for input tensor with shape {input_.size()}")
     if dim < 0:
         # Convert negative dim to positive.
         dim += input_.dim()
-- 
Gitee


From d7a5d625343c19c0fd0f4049ffe0d3067d956a8a Mon Sep 17 00:00:00 2001
From: taoyuan-guo <guotaoyuan1@h-partners.com>
Date: Fri, 14 Mar 2025 17:58:33 +0800
Subject: [PATCH 3/3] =?UTF-8?q?hunyuanvideo=E5=88=9D=E7=89=88=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
index f4df023c5a..fe59ad351c 100644
--- a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
@@ -47,7 +47,8 @@ def as_list_of_2tuple(x):
     x = as_tuple(x)
     if len(x) == 1:
         x = (x[0], x[0])
-    assert len(x) % 2 == 0, f"Expect even length, got {len(x)}."
+    if len(x) % 2 != 0:
+        raise ValueError(f"Expect even length, got {len(x)}.")
     lst = []
     for i in range(0, len(x), 2):
         lst.append((x[i], x[i + 1]))
-- 
Gitee