From 80670bd924872e0cd9a146ea54500efab01f26f2 Mon Sep 17 00:00:00 2001 From: taoyuan-guo Date: Fri, 14 Mar 2025 17:19:46 +0800 Subject: [PATCH 1/3] =?UTF-8?q?hunyuanvideo=E5=88=9D=E7=89=88=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../HunyuanVideo/hyvideo/utils/ file_utils.py | 86 ++++++++++ .../HunyuanVideo/hyvideo/utils/__init__.py | 0 .../HunyuanVideo/hyvideo/utils/data_utils.py | 30 ++++ .../HunyuanVideo/hyvideo/utils/helpers.py | 54 +++++++ .../hyvideo/utils/parallel_mgr.py | 151 ++++++++++++++++++ ...preprocess_text_encoder_tokenizer_utils.py | 64 ++++++++ 6 files changed, 385 insertions(+) create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py create mode 100644 MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py new file mode 100644 index 0000000000..e518cb109d --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py @@ -0,0 +1,86 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +from pathlib import Path +from einops import rearrange + +import torch +import torchvision +import numpy as np +import imageio + +CODE_SUFFIXES = { + ".py", # Python codes + ".sh", # Shell scripts + ".yaml", + ".yml", # Configuration files +} + + +def safe_dir(path): + """ + Create a directory (or the parent directory of a file) if it does not exist. + + Args: + path (str or Path): Path to the directory. + + Returns: + path (Path): Path object of the directory. + """ + path = Path(path) + path.mkdir(exist_ok=True, parents=True) + return path + + +def safe_file(path): + """ + Create the parent directory of a file if it does not exist. + + Args: + path (str or Path): Path to the file. + + Returns: + path (Path): Path object of the file. + """ + path = Path(path) + path.parent.mkdir(exist_ok=True, parents=True) + return path + + +def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=1, fps=24): + """save videos by video tensor + copy from https://github.com/guoyww/AnimateDiff/blob/e92bd5671ba62c0d774a32951453e328018b7c5b/animatediff/utils/util.py#L61 + + Args: + videos (torch.Tensor): video tensor predicted by the model + path (str): path to save video + rescale (bool, optional): rescale the video tensor from [-1, 1] to . Defaults to False. + n_rows (int, optional): Defaults to 1. + fps (int, optional): video save fps. Defaults to 8. + """ + videos = rearrange(videos, "b c t h w -> t b c h w") + outputs = [] + for x in videos: + x = torchvision.utils.make_grid(x, nrow=n_rows) + x = x.transpose(0, 1).transpose(1, 2).squeeze(-1) + if rescale: + x = (x + 1.0) / 2.0 # -1,1 -> 0,1 + x = torch.clamp(x, 0, 1) + x = (x * 255).numpy().astype(np.uint8) + outputs.append(x) + + os.makedirs(os.path.dirname(path), exist_ok=True) + imageio.mimsave(path, outputs, fps=fps) \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py new file mode 100644 index 0000000000..de00d80d59 --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py @@ -0,0 +1,30 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import math + + +def align_to(value, alignment): + """align hight, width according to alignment + + Args: + value (int): height or width + alignment (int): target alignment factor + + Returns: + int: the aligned value + """ + return int(math.ceil(value / alignment) * alignment) \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py new file mode 100644 index 0000000000..f4df023c5a --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py @@ -0,0 +1,54 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import collections.abc +from itertools import repeat + + +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): + x = tuple(x) + if len(x) == 1: + x = tuple(repeat(x[0], n)) + return x + return tuple(repeat(x, n)) + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) + + +def as_tuple(x): + if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): + return tuple(x) + if x is None or isinstance(x, (int, float, str)): + return (x,) + else: + raise ValueError(f"Unknown type {type(x)}") + + +def as_list_of_2tuple(x): + x = as_tuple(x) + if len(x) == 1: + x = (x[0], x[0]) + assert len(x) % 2 == 0, f"Expect even length, got {len(x)}." + lst = [] + for i in range(0, len(x), 2): + lst.append((x[i], x[i + 1])) + return lst \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py new file mode 100644 index 0000000000..46c86366e3 --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py @@ -0,0 +1,151 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import torch +import logging +from typing import List, Optional, Union +import torch.distributed as dist +from yunchang import set_seq_parallel_pg + +logger = logging.getLogger(__name__) + + +def init_distributed_environment( + world_size: int = -1, + rank: int = -1, + distributed_init_method: str = "env://", + local_rank: int = -1, + backend: str = "hccl" +): + logger.debug( + "world_size=%d rank=%d local_rank=%d " "distributed_init_method=%s backend=%s", + world_size, + rank, + local_rank, + distributed_init_method, + backend, + ) + if not torch.distributed.is_initialized(): + assert distributed_init_method is not None, ( + "distributed_init_method must be provided when initializing " + "distributed environment" + ) + # this backend is used for WORLD + torch.distributed.init_process_group( + backend=backend, + init_method=distributed_init_method, + world_size=world_size, + rank=rank, + ) + torch.npu.set_device(rank) + + +def initialize_model_parallel( + data_parallel_degree: int = 1, + classifier_free_guidance_degree: int = 1, + sequence_parallel_degree: int = 1, + ulysses_degree: int = 1, + ring_degree: int = 1, + tensor_parallel_degree: int = 1, + pipeline_parallel_degree: int = 1, + vae_parallel_size: int = 0, + backend: Optional[str] = None, +) -> None: + + assert torch.distributed.is_initialized() + world_size: int = torch.distributed.get_world_size() + backend = backend + + dit_parallel_size = (data_parallel_degree * + classifier_free_guidance_degree * + sequence_parallel_degree * + pipeline_parallel_degree * + tensor_parallel_degree) + + if world_size < dit_parallel_size: + raise RuntimeError( + f"world_size ({world_size}) is less than " + f"tensor_parallel_degree ({tensor_parallel_degree}) x " + f"pipeline_parallel_degree ({pipeline_parallel_degree}) x" + f"sequence_parallel_degree ({sequence_parallel_degree}) x" + f"classifier_free_guidance_degree " + f"({classifier_free_guidance_degree}) x" + f"data_parallel_degree ({data_parallel_degree})" + ) + if world_size == 8: + set_seq_parallel_pg( + sp_ulysses_degree=ulysses_degree, + sp_ring_degree=ring_degree, + rank=dist.get_rank(), + world_size=world_size + ) + elif world_size == 16: + set_seq_parallel_pg( + sp_ulysses_degree=ulysses_degree, + sp_ring_degree=ring_degree, + rank=dist.get_rank(), + world_size=world_size, + use_ulysses_low=False + ) + + +def get_sequence_parallel_world_size(): + return dist.get_world_size() + + +def get_sequence_parallel_rank(): + return dist.get_rank() + + +def all_gather( + input_: torch.Tensor, dim: int = 0, separate_tensors: bool = False +) -> Union[torch.Tensor, List[torch.Tensor]]: + world_size = get_sequence_parallel_world_size() + if world_size == 1: + return input_ + assert ( + -input_.dim() <= dim < input_.dim() + ), f"Invalid dim ({dim}) for input tensor with shape {input_.size()}" + if dim < 0: + # Convert negative dim to positive. + dim += input_.dim() + # Allocate output tensor. + input_size = list(input_.size()) + input_size[0] *= world_size + output_tensor = torch.empty( + input_size, dtype=input_.dtype, device=input_.device + ) + # All-gather. + torch.distributed.all_gather_into_tensor( + output_tensor, input_ + ) + if dim != 0: + input_size[0] //= world_size + output_tensor = output_tensor.reshape([world_size, ] + input_size) + output_tensor = output_tensor.movedim(0, dim) + if separate_tensors: + tensor_list = [ + output_tensor.view(-1) + .narrow(0, input_.numel() * i, input_.numel()) + .view_as(input_) + for i in range(world_size) + ] + return tensor_list + else: + input_size = list(input_.size()) + input_size[dim] = input_size[dim] * world_size + # Reshape + output_tensor = output_tensor.reshape(input_size) + return output_tensor \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py new file mode 100644 index 0000000000..923dba211a --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py @@ -0,0 +1,64 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import argparse +import torch +from transformers import ( + AutoProcessor, + LlavaForConditionalGeneration, +) +import torch +import torch_npu +from torch_npu.contrib import transfer_to_npu + + +def preprocess_text_encoder_tokenizer(args): + + processor = AutoProcessor.from_pretrained(args.input_dir) + model = LlavaForConditionalGeneration.from_pretrained( + args.input_dir, + torch_dtype=torch.float16, + low_cpu_mem_usage=True, + ).to(0) + + model.language_model.save_pretrained( + f"{args.output_dir}" + ) + processor.tokenizer.save_pretrained( + f"{args.output_dir}" + ) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_dir", + type=str, + required=True, + help="The path to the llava-llama-3-8b-v1_1-transformers.", + ) + parser.add_argument( + "--output_dir", + type=str, + default="", + help="The output path of the llava-llama-3-8b-text-encoder-tokenizer." + "if '', the parent dir of output will be the same as input dir.", + ) + args = parser.parse_args() + + if len(args.output_dir) == 0: + args.output_dir = "/".join(args.input_dir.split("/")[:-1]) + + preprocess_text_encoder_tokenizer(args) \ No newline at end of file -- Gitee From 006a3fe378ed3872a00080c72c7935a658e3c7ac Mon Sep 17 00:00:00 2001 From: taoyuan-guo Date: Fri, 14 Mar 2025 17:49:42 +0800 Subject: [PATCH 2/3] =?UTF-8?q?hunyuanvideo=E5=88=9D=E7=89=88=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../HunyuanVideo/hyvideo/utils/data_utils.py | 2 +- .../hyvideo/utils/parallel_mgr.py | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py index de00d80d59..a34eff9652 100644 --- a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py @@ -13,8 +13,8 @@ # limitations under the License. # ============================================================================== -import numpy as np import math +import numpy as np def align_to(value, alignment): diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py index 46c86366e3..abce0a38f4 100644 --- a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py @@ -13,10 +13,12 @@ # limitations under the License. # ============================================================================== -import torch import logging +import torch + from typing import List, Optional, Union import torch.distributed as dist + from yunchang import set_seq_parallel_pg logger = logging.getLogger(__name__) @@ -38,10 +40,11 @@ def init_distributed_environment( backend, ) if not torch.distributed.is_initialized(): - assert distributed_init_method is not None, ( - "distributed_init_method must be provided when initializing " - "distributed environment" - ) + if distributed_init_method is None: + raise ValueError( + "distributed_init_method must be provided when initializing " + "distributed environment" + ) # this backend is used for WORLD torch.distributed.init_process_group( backend=backend, @@ -64,7 +67,9 @@ def initialize_model_parallel( backend: Optional[str] = None, ) -> None: - assert torch.distributed.is_initialized() + if not torch.distributed.is_initialized(): + raise ValueError("Distributed process group has not been initialized") + world_size: int = torch.distributed.get_world_size() backend = backend @@ -115,9 +120,8 @@ def all_gather( world_size = get_sequence_parallel_world_size() if world_size == 1: return input_ - assert ( - -input_.dim() <= dim < input_.dim() - ), f"Invalid dim ({dim}) for input tensor with shape {input_.size()}" + if dim >= input_.dim() or dim < -input_.dim(): + raise ValueError(f"Invalid dim ({dim}) for input tensor with shape {input_.size()}") if dim < 0: # Convert negative dim to positive. dim += input_.dim() -- Gitee From d7a5d625343c19c0fd0f4049ffe0d3067d956a8a Mon Sep 17 00:00:00 2001 From: taoyuan-guo Date: Fri, 14 Mar 2025 17:58:33 +0800 Subject: [PATCH 3/3] =?UTF-8?q?hunyuanvideo=E5=88=9D=E7=89=88=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py index f4df023c5a..fe59ad351c 100644 --- a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py @@ -47,7 +47,8 @@ def as_list_of_2tuple(x): x = as_tuple(x) if len(x) == 1: x = (x[0], x[0]) - assert len(x) % 2 == 0, f"Expect even length, got {len(x)}." + if len(x) % 2 != 0: + raise ValueError(f"Expect even length, got {len(x)}.") lst = [] for i in range(0, len(x), 2): lst.append((x[i], x[i + 1])) -- Gitee