diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..023869fa0adbf04bb38e73feb28298d42d0f7f6c --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_postprocess.py +import sys +import os +import time +import pdb +import argparse +from nnunet.inference import predict_simple2 + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp', '--file_path', help='output bin files path', required=True) + args = parser.parse_args() + python_file = predict_simple2.__file__ # /home/hyp/UNetPlusPlus/pytorch/nnunet/inference/predict_simple2.py + file_path = args.file_path + pre_mode = 2 + command = 'python3 ' + str(python_file) + ' --pre_mode ' + str(pre_mode) + ' --file_path ' + str(file_path) + os.system(command) + + +if __name__ == "__main__": + main() + print('main end') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..143fb939b116e7d44c7dc316584fe84b9156a3f2 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_preprocess.py +import sys +import os +import time +import pdb +import argparse +from nnunet.inference import predict_simple2 + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp', '--file_path', help='input bin files path', required=True) + args = parser.parse_args() + python_file = predict_simple2.__file__ # /home/hyp/UNetPlusPlus/pytorch/nnunet/inference/predict_simple2.py + file_path = args.file_path + pre_mode = 1 + command = 'python3 ' + str(python_file) + ' --pre_mode ' + str(pre_mode) + ' --file_path ' + str(file_path) + os.system(command) + + +if __name__ == "__main__": + main() + print('main end') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..0d8e115f5bd1c2f6919069e2ed6807b4cf7819bb --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py @@ -0,0 +1,57 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_pth2onnx.py +import sys +import os +import time +import pdb +import argparse +from batchgenerators.utilities.file_and_folder_operations import join, isdir +from nnunet.paths import default_plans_identifier, network_training_output_dir, default_cascade_trainer, default_trainer +from nnunet.training.model_restore import load_model_and_checkpoint_files +from nnunet.inference.predict2 import pth2onnx + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp', '--file_path', help='output onnx file path', required=True) + args = parser.parse_args() + fp = args.file_path + model = '3d_fullres' + task_name = 'Task003_Liver' + trainer = 'nnUNetPlusPlusTrainerV2' + plans_identifier = 'nnUNetPlansv2.1' + model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + plans_identifier) + model = model_folder_name + folds = None # 如果文件存放路径正确,会自动识别到教程中的fold 0 + mixed_precision = True + checkpoint_name = 'model_final_checkpoint' + trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) + pre_mode = -1 + if int(pre_mode) == -1: + p = params[0] + trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 + print('pth2onnx start') + pth2onnx(trainer.network, fp) + print('pth2onnx end') + print('onnx模型已经输出至:', fp) + + +if __name__ == "__main__": + main() + print('main end') + + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License new file mode 100644 index 0000000000000000000000000000000000000000..eeac88fb9dc15a1427b41173cf5f136327230c49 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/README.md b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9e486602d5b65f382eaa4ca05032652c462665d2 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/README.md @@ -0,0 +1,409 @@ +# 3D_Nested_Unet模型PyTorch离线推理指导 + +**本教程的文件及其说明** +``` +推理工具 +├── benchmark.aarch64 //离线推理工具(适用ARM架构),可能需要用户自行编译获得 +├── benchmark.x86_64 //离线推理工具(适用x86架构),可能需要用户自行编译获得 +脚本文件 +├── set_env.sh //NPU环境变量 +├── clear2345.sh //清理文件、合并结果脚本 +├── get_dataset_info.py //用于获取二进制数据集信息的脚本 +├── 3d_nested_unet_pth2onnx.py //生成ONNX模型文件的程序 +├── 3d_nested_unet_preprocess.py //数据前处理,生成输入bin文件的程序 +├── 3d_nested_unet_postprocess.py //数据后处理,合并输出bin生成推理结果的程序 +├── onnx_infer.py //评测GPU性能的程序 +├── change_infer_path.py //修改实验路径的程序 +模型及权重文件(模型文件过大,很可能已经从本仓中移除) +├── nnunetplusplus.onnx //ONNX模型文件 +├── nnunetplusplus.om //OM模型文件 +其他文件 +├── README.md //快速上手指导,过程内容和本文大致相同 +├── new.patch //修改源代码的补丁 +├── requirements.txt //环境依赖,由pip freeze > re.txt生成 +权重文件download_models文件夹(该文件夹可能被打包上传至别处,请用户提前下载该文件) +├── Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/* //内含权重文件 +├── Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/plans.pkl //实验配置文件 +备份文件backup文件夹(该文件夹可能被打包上传至别处,请用户提前下载该文件) +├── nnUNet_preprocessed/ //待拷贝的实验配置文件 +├── output-gpu/ //在GPU上的全部推理结果,内含GPU精度结果 +├── output-npu/ //在NPU上的全部推理结果,内含NPU精度结果 +├── nnunetplusplus_prep_bin.info //对MSD数据集Task03中第11号图像生成的info文件 +├── perf_vision_batchsize_1_device_0.txt //NPU上的性能结果 +└── perf_T4gpu_batchsize_1.txt //GPU上的性能结果 +``` +**关键环境:** +| 依赖名 | 版本号 | +| :------: | :------: | +| CANN | 5.1.RC1.alpha001 | +| CANN(仅在atc转换OM时) | 5.0.3 / 5.1.RC1.alpha001 | +| CANN(除了使用atc以外的实验步骤时) | 5.0.3 / 5.0.4 / 5.1.RC1.alpha001 | +| python | ==3.7.5 | +| torch | >=1.6.0 (cpu版本即可) | +| batchgenerators | ==0.21 | +| numpy | 无特定版本要求 | +| pandas | 无特定版本要求 | +| pillow | 无特定版本要求 | +| SimpleITK | 无特定版本要求 | +| scikit-image | 无特定版本要求 | +| 其他依赖可在后文实验步骤中查阅 | 未指明 | + +**相关链接:** +| 名称和地址 | 说明 | +| :------: | :------: | +| [UNET官方代码仓](https://github.com/MIC-DKFZ/nnUNet) | UNET官方框架。 | +| [UNET++官方代码仓](https://github.com/MrGiovanni/UNetPlusPlus/tree/master/pytorch) | 依据UNET官方框架进行开发的UNET++官方代码。 | +| [MSD数据集(Medical Segmentation Decathlon)](http://medicaldecathlon.com/) | 医学十项全能数据集,内含10个子任务,本文只对任务3肝脏任务进行验证。数据图像均为三维灰度图像,您可以下载使用ITK-SNAP工具来可视化图像。 | +| [ITK-SNAP](http://www.itksnap.org/pmwiki/pmwiki.php) | 三维图像可视化工具。 | +| [UNET++模型权重文件](https://github.com/MrGiovanni/UNetPlusPlus/tree/master/pytorch) | UNET++作者提供的模型权重,在官方仓中“How to use UNet++”章节中存有链接。 | +| 权重文件download_models文件夹 | 本文所使用的,只节选了fold_0及plans.pkl的权重文件。若无链接,可下载UNET++作者提供的权重文件。 | +| 备份文件backup文件夹 | 本文所使用的,相关实验配置文件。链接位于:obs://ascend-pytorch-model-file/验收-推理/cv/segmentation/3D_Nested_Unet/实验配置文件、推理结果、性能参考文件/ | +| [benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) | 在310上进行推理所需的可执行文件。或许更新的msame工具也可以使用。 | +## 1 环境准备 + +### 1.1 获取源代码 +下载官方代码仓,并退回至指定版本,以保证代码稳定不变。本文以下教程与模型推理指导书保持相同。 +``` +cd /home/hyp/ +git clone https://github.com/MrGiovanni/UNetPlusPlus.git +cd UNetPlusPlus +git reset e145ba63862982bf1099cf2ec11d5466b434ae0b --hard +``` + +### 1.2 安装依赖,修改模型代码 +``` +cd /home/hyp/UNetPlusPlus/ +patch -p1 < ../new.patch # 载入代码修改补丁 +cd pytorch +pip install -e . +pip install batchgenerators==0.21 # 该依赖十分关键,指定其版本再手动安装一次 + +# 您也可以通过requirements来安装依赖包,但我们不推荐该方法 +pip install -r requirements.txt +``` +patch命令的最后一个参数需要指定本仓中的new.patch文件的路径。由于该模型需要将命令注册到环境中才能找到正确的函数入口,所以我们仍然需要一步pip来将代码注册到环境中。除此之外,每次将代码文件进行大幅度地增减时,“pip install -e .”都是必须的,否则很可能出现“import nnunet”错误。 + +我们不推荐您使用requirements.txt的方式来安装环境,因为这很可能遗漏nnunet的注册步骤,使得后续实验无法进行。 + +注:如果在执行“pip install -e .”或在后面的实验过程中,仍然出现了环境包或模块的安装或导入错误,则很可能需要重新手动安装部分包。我们认为,原作者没有完全指明必要的依赖,而那些隐藏的依赖目前已经升级了多个版本,导致各个依赖间的关系出现变化,进而使得如今完全按作者的描述安装依赖是不可行的。我们在多个服务器上,已观测到仍然可能出现异常的包有但不仅限于: + - torch (CPU版本即可) + - decorator + - sympy + - SimpleITK + - matplotlib + - batchgenerators==0.21 + - pandas + - scikit-image + - sklearn + - nibabel + +在多个不同的服务器环境上,上述的包都有过至少两次安装失败的经历。通常手动安装上述的包就可以解决问题,使用诸如“pip install batchgenerators==0.21”的方式来重新安装界面报错提示中指定的那些包,或更换镜像源。第二种方法是使用离线whl包进行安装。若仍无法解决,则很可能是系统底层版本过低,例如GLIBC。 + +### 1.3 准备数据集及环境设置 +该模型是依赖于[UNET官方代码仓](https://github.com/MIC-DKFZ/nnUNet)而进行的二次开发,依据UNET的描述,整个实验流程大体可描述为“数据格式转换->数据预处理->训练->验证->推理”。中间不可跳过,因为每一个后续步骤都依赖于前一个步骤的结果。您可以参照官方说明进行数据集设置,但过于繁琐。下面我们将描述其中的核心步骤及注意事项,必要时通过提供中间结果文件来帮助我们跳过一些步骤。 + +#### 1.3.1 设置nnunet环境变量 +参照UNET的描述,在硬盘空间充足的路径下,我们以/home/hyp/为例,在该路径下创建一个新的文件夹environment,用于存放相关实验数据,该路径不强求和项目所在路径相同。在environment中再创建三个子文件夹:nnUNet_raw_data_base、nnUNet_preprocessed和RESULTS_FOLDER。这三个文件夹不强求位于同一目录下,甚至可以位于多块硬盘下,出于检索方便的考虑,我们推荐将其位于同一目录下,例如environment。我们推荐您至少确保该路径下(指代environment)有400GB的存储空间。 +``` +cd environment +mkdir nnUNet_raw_data_base +mkdir nnUNet_preprocessed +mkdir RESULTS_FOLDER +``` +最后修改/root/.bashrc文件,在文件尾部添加如下环境变量。这样以后每次开启新会话时,位于.bashrc中的环境变量都会自动导入,无需用户再手动export一次。 +``` +export nnUNet_raw_data_base="/home/hyp/environment/nnUNet_raw_data_base" +export nnUNet_preprocessed="/home/hyp/environment/nnUNet_preprocessed" +export RESULTS_FOLDER="/home/hyp/environment/RESULTS_FOLDER" +``` +使用source命令来刷新环境变量。如果您实在不想修改.bashrc文件,您也可以在您当前会话中直接输入上面的三条export语句来设置环境变量,但是这些变量只在当前会话内有效。 +``` +source ~/.bashrc +``` +注:我们十分推荐将以上文件夹置于SSD上。如果使用的是机械硬盘,我们观察到该模型会占据大量的IO资源,导致系统卡顿。如果您还希望使用您设备上可用的GPU,则还需要额外添加以下环境变量。 +``` +# 配置GPU编号为0至3的四卡环境 +export CUDA_VISIBLE_DEVICES=0,1,2,3 +``` + +#### 1.3.2 获取数据集 +获取[Medical Segmentation Decathlon](http://medicaldecathlon.com/),下载其中的第三个子任务集Task03_Liver.tar,放到environment目录下(后文中environment均指代/home/hyp/environment/),并解压。该数据集中的Task03_Liver在后续实验过程中会被裁剪展开,该数据集在使用时将占用约260GB的存储空间。 +``` +# 确认系统剩余存储空间 +df -h +# 转移并解压数据集 +mv ./Task03_Liver.tar /home/hyp/environment/ +cd /home/hyp/environment/ +tar xvf Task03_Liver.tar +``` +至此,在environment文件夹内,文件结构应像如下所示,它们与上一节在.bashrc中设置的环境变量路径要保持相同: +``` +environment/ +├── nnUNet_preprocessed/ +├── nnUNet_raw_data_base/ +├── RESULTS_FOLDER/ +├── Task03_Liver/ +└── Task03_Liver.tar +``` + +#### 1.3.3 数据格式转换 +在environment文件夹内,使用nnunet的脚本命令,对解压出的Task03_Liver文件夹中的数据进行数据格式转换。该脚本将运行约5分钟,转换结果将出现在nnUNet_raw_data_base子文件夹中。 +``` +nnUNet_convert_decathlon_task -i Task03_Liver -p 8 +``` +如果您的设备性能较差或者该命令在较长时间后都未结束,您可以将参数-p的数值调小,这将消耗更多的时间。 + +注:若您在之后的实验过程中想要重置实验或者数据集发生严重问题(例如读取数据时遇到了EOF等读写错误),您可以将nnUNet_preprocessed、nnUNet_raw_data_base和RESULTS_FOLDER下的文件全部删除,并从本节开始复现后续过程。 + +#### 1.3.4 实验计划与预处理 +nnunet十分依赖数据集,这一步需要提取数据集的属性,例如图像大小、体素间距等,并生成后续实验的配置文件。若删减数据集图像,都将使后续实验配置发生变化。使用nnunet的脚本命令,对nnUNet_raw_data_base中的003任务采集信息。这个过程将持续半小时至六小时不等,具体时间依赖于设备性能,转换结果将出现在nnUNet_preprocessed子文件夹中。 +``` +nnUNet_plan_and_preprocess -t 003 --verify_dataset_integrity +``` +我们观察到,该过程很可能意外中断却不给予用户提示信息,这在系统内存较小时会随机发生,请您确保该实验过程可以正常结束。如果您的设备性能较差或者较长时间后都未能正常结束,您可以改用下面的命令来降低系统占用,而这将显著提升该步骤的运行时间。实践来看,通过输入free -m命令,如果系统显示的available Mem低于30000或在30000左右,则我们推荐您使用下面的命令。 +``` +nnUNet_plan_and_preprocess -t 003 --verify_dataset_integrity -tl 1 -tf 1 +``` +注:若在后续的实验步骤中出现形如“RuntimeError: Expected index [2, 1, 128, 128, 128] to be smaller than self [2, 3, 8, 8, 8] apart from dimension 1”的错误,请删除environment/nnUNet_preprocessed/Task003_Liver/以及environment/nnUNet_raw_data_base/nnUNet_cropped_data/下的所有文件,然后重新完成本节内容。 + +#### 1.3.5 拷贝实验配置文件 +由于nnunet的实验计划与预处理中,对数据集的划分存在随机性,为了保证后续实验的可控性,我们提供了一些支撑材料,位于backup文件夹内。其中有一份可用的实验配置文件,即设定了训练集、验证集的划分。请将这些文件覆盖到environment中。 + +注:请用户自行检查:若backup/nnUNet_preprocessed/内的文件为.json格式,请将其格式手动修改为.pkl格式,保持文件名不变,之后再进行拷贝。 +``` +# 拷贝实验计划的.pkl文件和对数据集划分的.pkl文件至environment中 +cp -rf /home/hyp/backup/nnUNet_preprocessed /home/hyp/environment/ +``` +在environment中创建一个新的子文件夹名为input,用于存放待推理的图像,同时再创建一个output文件夹用于存放模型的推理输出,请勿在以上两个文件夹中存放多余无关的文件。 +``` +cd environment +mkdir input output +``` +splits_final.pkl中存储了对数据的划分,27张图片编号如下所示。我们需要将这些验证集图像(存放于nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTr/拷贝到指定文件夹input下,作为我们的待推理图像,使用create_testset.py来完成验证集的迁移复制。当然您也可以自己指定想要推理的文件夹路径。 +``` +# 原始图像文件名形如liver_3_0000.nii.gz、liver_128_0000.nii.gz +# 验证集图片编号:3, 5, 11, 12, 17, 19, 24, 25, 27, 38, 40, 41, 42, 44, 51, 52, 58, 64, 70, 75, 77, 82, 101, 112, 115, 120, 128 +cd /home/hyp/UNetPlusPlus/pytorch/nnunet/inference +python create_testset.py /home/hyp/environment/input/ +``` +注:该步骤与UNET官方教程不同,官方使用nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/下的图像作为待推理图像,图像来自测试集,而本教程使用的是验证集。 + +#### 1.3.6 获取权重文件 +该模型采用了五重交叉验证的方法,因此作者提供的预训练的权重文件也分为五个文件夹,分别代表着5个fold(交叉)的结果。实测后,各个fold的精度都相差不大,浮动大约在1%以内,鉴于计算资源的考虑,整个实验过程我们只采用fold 0(第一个交叉实验)的结果。 + +下载预训练过的[模型参数权重download models](https://github.com/MrGiovanni/UNetPlusPlus/tree/master/pytorch),在environment下创建一个新的子文件夹download_models用于存放下载得到的压缩包,将该压缩包解压后得到五个文件夹及一个配置文件:fold_0, fold_1, fold_2, fold_3, fold_4, plans.pkl。 + +本文教程日后可能会单独提供fold_0及plans.pkl的压缩包,如果有,用户可以自行下载使用。 + +将其中的fold_0文件夹和plans.pkl拷贝至environment/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/下,模拟我们已经完成了训练过程,请提前创建相关子文件夹。 +``` +cd environment +cp -rf download_models/* /home/hyp/environment/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/ +``` +最终文件结构目录如下: +``` +environment/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/ +├── fold_0/ +│ ├── ... +│ ├── model_final_checkpoint.model +│ ├── model_final_checkpoint.model.pkl +│ └── ... +└── plans.pkl +``` + +#### 1.3.7 设置推理实验相关路径 +后续推理实验通常要使用多个路径参数,使用时十分容易造成混淆。因为在前文中我们已经设置了nnunet环境变量,所以我们可以认为该模型的相关路径都是稳定的,不会经常变动。为了让后续的实验更加便捷,我们可以在程序中设置好路径作为默认参数。使用change_infer_path.py来完成这个操作,参数为三个绝对路径(以下三个fp不能指向同一个目录)。 +``` +python change_infer_path.py -fp1 INFERENCE_INPUT_FOLDER –fp2 INFERENCE_OUTPUT_FOLDER -fp3 INFERENCE_SHAPE_PATH +#例:python change_infer_path.py -fp1 /home/hyp/environment/input/ -fp2 /home/hyp/environment/output/ -fp3 /home/hyp/environment/ + +``` +以上的三个路径参数的具体说明如下,我们推荐将这些路径指向environment文件夹内,便于用户检索: + - INFERENCE_INPUT_FOLDER:存放待推理图像的文件夹。(该文件夹在1.3.5节中被创建) + - INFERENCE_OUTPUT_FOLDER:推理完成后,存放推理结果的文件夹。(该文件夹在1.3.5节中被创建) + - INFERENCE_SHAPE_PATH:存放文件all_shape.txt的目录。在后续实验过程中会被介绍到,在该目录下会生成一个all_shape.txt,存储着当前待推理图像的属性。这是一个中间结果文件,用户无需具体了解。 + +最后,您可以打开项目代码中的UNetPlusPlus/pytorch/nnunet/inference/infer_path.py查看修改的结果,修改后的效果示例如下所示: +``` +# 以下两项为固定值,为历史需求变更后的版本遗留项,请保证为None +INFERENCE_BIN_INPUT_FOLDER = None +INFERENCE_BIN_OUTPUT_FOLDER = None + +# 以下三项为change_infer_path.py修改后的三个路径 +INFERENCE_INPUT_FOLDER = '/home/hyp/environment/input/' +INFERENCE_OUTPUT_FOLDER = '/home/hyp/environment/output/' +INFERENCE_SHAPE_PATH = '/home/hyp/environment/' +``` +注:在本节中,您可能会首次打开并查看项目代码。如果文件中存在中文字符,在您的软件上可能会显示乱码,请更换编码方式为UTF-8来查看。 + +#### 1.3.8 拷贝实验结果 +推理需要对验证集中未经训练的27张图像进行推理,实测上在NPU上完成全部的推理需要2-4天时间。由于推理过程过于繁琐,我们额外提供了一份含有在fold 0设置下的全部推理结果的附加文件,也包含在NPU上的完整推理流程下的推理结果。后文将以编号11的图像为例,讲解如何进行单幅图像的推理,而其他编号的图像也可以遵循同样的方法来得到,进而复现出所有的推理结果。所有验证集图像的编号如下,将backup/output-npu/中的NPU推理结果拷贝至INFERENCE_OUTPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/output/)下。 +``` +# 结果图像文件名形如liver_5.nii.gz、liver_112.nii.gz +# 图片编号同1.3.5节中所介绍的:3, 5, 11, 12, 17, 19, 24, 25, 27, 38, 40, 41, 42, 44, 51, 52, 58, 64, 70, 75, 77, 82, 101, 112, 115, 120, 128 +cp -rf /home/hyp/backup/output-npu/* /home/hyp/environment/output/ +``` +注:output-npu和output-gpu下的summary.json即为整个实验在NPU和GPU上的精度评测结果,仅供参考。在2.9节中我们会替换掉它生成新的评测结果。若用户发现存在plans.json文件,请将其后缀格式修改为.pkl。 + +### 1.4 获取[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将编译好的benchmark.x86_64或benchmark.aarch64放到当前工作目录。您可以使用如下命令来确认自己的系统是x86架构还是aarch架构。 +``` +uname -a +``` + +## 2 离线推理 + +### 2.1 生成om模型 +下面简要介绍了离线推理中的重要步骤所使用的程序,它们都必须接受一个用户提供的路径参数--file_path: + - 3d_nested_unet_pth2onnx.py:转换模式。加载预训练的模型并转化为onnx模型,输出的onnx文件为--file_path。 + - 3d_nested_unet_preprocess.py:拆分模式。数据前处理,将INFERENCE_INPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/input/)下的待推理图像切割子图,生成一批输入.bin文件,存放到--file_path下。 + - 3d_nested_unet_postprocess.py:组合模式。数据后处理,将--file_path下的输出.bin文件合并出推理结果,推理结果会存放到INFERENCE_OUTPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/output/)下。 + +首先让模型载入预训练好的权重,将其转化为onnx模型,输出文件为一个指定路径下的nnunetplusplus.onnx,暂且将其置于environment内。 +``` +python 3d_nested_unet_pth2onnx.py --file_path /home/hyp/environment/nnunetplusplus.onnx +``` +注:首次运行该程序时,将消耗比平时更多的时间。 + +之后我们需要将onnx转化为om模型,先使用npu-smi info查看设备状态,确保device空闲后,执行以下命令。这将生成batch_size为1的om模型,其输入onnx文件为nnunetplusplus.onnx,输出om文件命名为nnunetplusplus,这将在当前路径下生成nnunetplusplus.om文件,后面的--input_format和--input_shape参数则指代了该模型的输入图像规格与尺寸。 +``` +cd environment +atc --framework=5 --model=nnunetplusplus.onnx --output=nnunetplusplus --input_format=NCDHW --input_shape="image:1,1,128,128,128" --log=debug --soc_version=Ascend310 +``` +注:我们注意到在CANN 5.0.3上,atc命令可以通过,但在CANN 5.0.4上却会报错:RuntimeError: ({'errCode': 'E90003', 'detailed_cause': 'tuple_reduce_sum not support'}, 'Compile operator failed, cause: Template constraint, detailed information: tuple_reduce_sum not support.')。最终本文在CANN 5.1.RC1.alpha001下又得以通过。 + +### 2.2 删除指定的待推理图像的结果文件 +本质上,若在INFERENCE_INPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/input/)中存在输入图像而在INFERENCE_OUTPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/output/)中不存在结果图像,二者的差集便是模型需要进行推理的内容,接着模型便是随机挑选一张未经推理的图像进行推理,这个随机性是由多个进程的IO读取速率来决定的。 + +因此,我们将INFERENCE_OUTPUT_FOLDER中的某个指定编号的文件删除掉,就可以对该图像进行一次推理流程了。删除输出结果文件夹INFERENCE_OUTPUT_FOLDER中的编号为11的结果,模拟已经完成了其余26张图像的推理,并准备开始对编号11的图像进行推理。 +``` +# 全部验证集图像的编号:3, 5, 11, 12, 17, 19, 24, 25, 27, 38, 40, 41, 42, 44, 51, 52, 58, 64, 70, 75, 77, 82, 101, 112, 115, 120, 128 +rm /home/hyp/environment/output/liver_11.nii.gz +``` +如果您想推理其他图像,删除在INFERENCE_OUTPUT_FOLDER中的其他编号的结果文件,使得与INFERENCE_INPUT_FOLDER的差集不为空集即可。我们推荐您每次只推理一张图像,否则您无法确切知道模型目前正在推理哪张图像,以及当前推理的进度。如果差集较大,则很可能占据超过预期的存储空间。 + +### 2.3 数据预处理后切割子图,生成待输入bin文件 +遵从UNET的实验流程,一张待推理的图像会被切割出1000至4000张的子图,我们需要将这些子图存储为.bin文件,存放在指定目录下,暂且先定为environment/input_bins。使用3d_nested_unet_preprocess.py,参数--file_path指定为想要生成输入bin文件的目录,请用户自行创建该文件夹。 +``` +python 3d_nested_unet_preprocess.py --file_path /home/hyp/environment/input_bins/ +``` +该程序执行成功后,会在--file_path下生成大量的.bin文件,并且在INFERENCE_SHAPE_PATH(在1.3.7节中被设置为/home/hyp/environment/)下生成一个all_shape.txt文件,该文件存储了当前待输入图像的部分属性信息,这些信息将在后续的实验过程中帮助输出.bin的结果合并,使用过程中无需查阅里面的内容。 + +注:请确保有充足的硬盘空间。若使用310设备,遵从UNET的实验流程设计,推理一副图像,预计消耗200GB至800GB(多为300GB左右,上限受原始图像尺寸影响,800GB是一个预估值)的额外存储空间,耗时半小时至两小时不等。待推理的图像共有27张,不可能一次性将所有图像都推理完毕,因此我们只能采用逐个图像推理,之后立即做结果合并,然后删除掉使用过的bin文件,重复此过程。 + +### 2.4 生成info文件 +使用UNetPlusPlus/pytorch/nnunet/inference/gen_dataset_info.py,读取INFERENCE_INPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/input/)中全部文件的路径,即生成的预处理数据.bin的路径,进而生成对应的info文件,作为benchmark工具推理的输入,将结果命名为nnunetplusplus.info,两个参数128指代了模型的输入尺寸。 +``` +python gen_dataset_info.py bin ./environment/input_bins nnunetplusplus_prep_bin.info 128 128 +``` +这个操作同时会在nnunetplusplus_prep_bin.info所在目录下额外生成四个子文件:sth1.info, sth2.info, sth3.info, sth4.info。它们是对nnunetplusplus_prep_bin.info的不重叠的有序拆分,有了这些拆分的info文件,便于我们同步使用4个310设备进行推理,加快实验进度。 + +### 2.5 使用benchmark工具进行推理 +确保device空闲,将benchmark工具与上节生成的.info文件放于同一目录下,使用benchmark工具同步开启一个或四个进程进行推理。参数-device_id指代了使用的设备编号,-om_path指代了使用的om模型,-input_text_path指代了采用的info文件,-output_binary=True指代了将结果保存为.bin。 +``` +source set_env.sh # 激活NPU环境 +# 方法一:使用总的nnunetplusplus_prep_bin.info,使用1个310进行推理 +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./environment/nnunetplusplus.om -input_text_path=nnunetplusplus_prep_bin.info -input_width=128 -input_height=128 -output_binary=True -useDvpp=False + +# 方法二:使用拆分的四个info,使用4个310进行推理,全部推理结束后必须使用clear2345.sh脚本。可以通过打开四个session来完成 +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./environment/nnunetplusplus.om -input_text_path=sth1.info -input_width=128 -input_height=128 -output_binary=True -useDvpp=False +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=./environment/nnunetplusplus.om -input_text_path=sth2.info -input_width=128 -input_height=128 -output_binary=True -useDvpp=False +./benchmark.x86_64 -model_type=vision -device_id=2 -batch_size=1 -om_path=./environment/nnunetplusplus.om -input_text_path=sth3.info -input_width=128 -input_height=128 -output_binary=True -useDvpp=False +./benchmark.x86_64 -model_type=vision -device_id=3 -batch_size=1 -om_path=./environment/nnunetplusplus.om -input_text_path=sth4.info -input_width=128 -input_height=128 -output_binary=True -useDvpp=False +``` +这会在当前路径下,自动生成result文件夹,里面有形如dumpOutput_device0的子文件夹,存放着对info文件中记录的每个输入.bin的推理输出.bin,“device0”指在第0个设备上的运行结果。而生成的形如perf_vision_batchsize_1_device_0.txt的文件,则记录了310推理过程中的部分指标与性能。 + +注:本节内容将会产生大量的输出.bin文件,请使用df -h及时观测硬盘剩余空间。如果实验进行到一半,硬盘空间紧张,请查阅下节内容。 + +### 2.6 清除多余的结果 +上节中使用的benchmark工具,对每张输入.bin会输出五个输出结果.bin文件,而只有其中之一是我们所需要的结果。我们需要修改程序脚本中的路径。找到项目UNetPlusPlus下的clear2345.sh,该脚本用于删除310输出结果中后缀带有2、3、4、5的冗余.bin文件(保留后缀带有1的.bin文件),并将所有的.bin文件都移动到同一个文件夹下(例如放置于device0卡的输出路径),便于后续的结果合并搜索指定子图结果。我们将该脚本中的rm命令参数替换为正确的310输出路径。之后的mv命令,用于将4卡的输出结果全部移动到1卡上,也要保持正确。该脚本在推理时才会用到,一份可用的示例如下: +``` +# 删除多余的输出.bin文件 +rm -rf ./result/dumpOutput_device*/*_2.bin +rm -rf ./result/dumpOutput_device*/*_3.bin +rm -rf ./result/dumpOutput_device*/*_4.bin +rm -rf ./result/dumpOutput_device*/*_5.bin + +# 将其他文件夹的.bin结果移动到同一个目录下 +mv ./result/dumpOutput_device1/* ./result/dumpOutput_device0/ +mv ./result/dumpOutput_device2/* ./result/dumpOutput_device0/ +mv ./result/dumpOutput_device3/* ./result/dumpOutput_device0/ +``` +通常来说,您只需要对上述脚本设置一次即可。执行该脚本将多余的.bin文件删除。当所有设备都推理结束后,也请执行一次该脚本,确保所有结果都位于同一文件夹下。 +``` +bash clear2345.sh +``` +注:clear2345.sh脚本可与前一节同步使用。及时使用df -h命令查看硬盘剩余空间,适时调用该脚本清理多余的后缀为2、3、4、5的输出.bin文件,使得该实验仍可以在存储空间较小的设备上运行。以4卡并行为例,每半小时运行一次该脚本,可以清理出约150GB-200GB的存储空间。在前一节内容全部完成后,也要调用一次该脚本,将4卡的结果都移动到dumpOutput_device0文件夹中,保证dumpOutput_device0文件夹中保留有全部的输出.bin文件。 + +### 2.7 将结果.bin文件合并为最终推理结果 +使用3d_nested_unet_postprocess.py,参数--file_path指定为经310推理生成的.bin文件的目录,也就是将result/dumpOutput_device0/下的.bin文件做结果合并。生成的推理结果会输出到INFERENCE_OUTPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/output/)下。 +``` +python 3d_nested_unet_postprocess.py --file_path /home/hyp/result/dumpOutput_device0/ +``` + +### 2.8 重复实验 +截止目前,我们已经完成了1张编号为11的待推理图像的推理结果,删除benchmark工具生成的相关文件,即result/dumpOutput_device*/,释放硬盘空间。 + +若用户希望复现其他结果,请重复2.2至2.8的步骤,直至全部的验证集图片都推理完毕。 + +### 2.9 精度评测 +推理完成后,我们需要对全部结果做精度验证。将INFERENCE_OUTPUT_FOLDER(在1.3.7节中被设置为/home/hyp/environment/output/)下的结果拷贝至environment/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/validation_raw/下,模拟我们已经使用模型完成了训练过程,并且进入了验证阶段。请用户自行创建相关子文件夹。 +``` +cp -rf /home/hyp/environment/output/* environment/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/validation_raw/ +``` +请确保有27个结果图像已经位于上述的validation_raw文件夹中。然后使用nnUNet_train脚本命令开始评测精度,--validation_only表明我们不需要重新训练,直接进入验证步骤。 +``` +nnUNet_train 3d_fullres nnUNetPlusPlusTrainerV2 003 0 --validation_only +``` +注:首次运行nnUNet_train命令时,模型将开始对数据集解包,这将消耗比平时更多的时间。 + +实验的精度将记录在environment/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/validation_raw/summary.json中,您可以参照如下的结构树来找到其中的Dice指标。结果存在浮动是正常现象。 +``` +summary.json +├── "author" +├── "description" +├── "id" +├── "name" +├── "results" +│ ├── "all" +│ └── "mean" +│ ├── "0" +│ ├── "1" +│ │ ├── ... +│ │ ├── "Dice": 0.9655123016429166 +│ │ └── ... +│ └── "2" +│ ├── ... +│ ├── "Dice": 0.719350267858144 +│ └── ... +├── "task": "Task003_Liver" +└── "timestamp" +``` +这是在第一折交叉验证下的结果,验证集图像只有27张,本文的肝脏数据是在不同的实验仪器下采集的,图像尺寸与图像质量均存在较大差异。选用不同的交叉必然会导致不同的实验结果,但对精度达标的目标来说影响不大。 + +### 2.10 性能评测 +GPU上的性能使用onnx_infer.py来计算,需要在T4服务器上执行。您也可以在从backup/perf_T4gpu_batchsize_1.txt中直接查看性能结果。 +``` +python onnx_infer.py nnunetplusplus.onnx 1,1,128,128,128 +``` +NPU上的性能使用benchmark工具来计算,需要在310服务器上执行。使用benchmark前需要激活set_env.sh环境变量。您也可以在前面benchmark的输出文件夹result/下找到perf_vision_batchsize_1_device_0.txt文件,该文件由benchmark默认生成,在backup中我们也提供了一份实测样本,该结果与以下命令得到的结果几乎相同。 +``` +source set_env.sh +./benchmark.x86_64 -round=20 -om_path=nnunetplusplus.om -device_id=0 -batch_size=1 +``` +以下是实测结果,可供参考: +``` +NPU 310性能:ave_throughputRate = 0.235349samples/s, ave_latency = 4249.14ms +GPU T4性能:Average time spent: 2.68s +``` + +**评测结果:** +| 模型 | 官网pth精度 | GPU推理精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | :------: | +| 3D nested_unet bs1 | [Liver 1_Dice (val):95.80, Liver 2_Dice (val):65.60](https://github.com/MrGiovanni/UNetPlusPlus/tree/master/pytorch) | Liver 1_Dice (val):96.55, Liver 2_Dice (val):71.94 | Liver 1_Dice (val):96.55, Liver 2_Dice (val):71.97 | 0.3731fps | 0.9414fps | + +备注: + +1.该模型的推理过程从设计之初便不支持batchsize 2及以上,本教程全程使用了batchsize 1。 + +2.本应使用测试集进行精度验证的。但该数据集的测试集不支持单任务的精度测试,其测试集label是不公开的。因此本文只能使用数据集的验证集进行精度测试,这也导致了本文的一些实验步骤与官方不同。 diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py new file mode 100644 index 0000000000000000000000000000000000000000..a46e74d40f8dc8098a83065c29ab26a6ea59dc94 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py @@ -0,0 +1,61 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_preprocess.py +import sys +import os +import time +import pdb +import argparse +from nnunet.inference import infer_path + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp1', '--file_path1', help='INFERENCE_INPUT_FOLDER', required=True, default='/home/hyp/environment/input/') + parser.add_argument('-fp2', '--file_path2', help='INFERENCE_OUTPUT_FOLDER', required=True, default='/home/hyp/environment/output/') + parser.add_argument('-fp3', '--file_path3', help='INFERENCE_SHAPE_PATH', required=True, default='/home/hyp/environment/') + args = parser.parse_args() + python_file = infer_path.__file__ + fp1 = args.file_path1 + fp2 = args.file_path2 + fp3 = args.file_path3 + lines = [] + print('尝试读取:', python_file) + file = open(python_file, 'r', encoding='utf-8') + lines = file.readlines() + file.close() + print('尝试修改路径') + with open(python_file, 'w', encoding='utf-8') as f: + for line in lines: + if line.startswith('INFERENCE_INPUT_FOLDER'): + line = 'INFERENCE_INPUT_FOLDER = ' + '\'' + str(fp1) + '\'' + '\n' + if line.startswith('INFERENCE_OUTPUT_FOLDER'): + line = 'INFERENCE_OUTPUT_FOLDER = ' + '\'' + str(fp2) + '\'' + '\n' + if line.startswith('INFERENCE_SHAPE_PATH'): + line = 'INFERENCE_SHAPE_PATH = ' + '\'' + str(fp3) + '\'' + '\n' + f.write(line) + print('正在修改:', python_file) + print('INFERENCE_INPUT_FOLDER =', fp1) + print('INFERENCE_OUTPUT_FOLDER=', fp2) + print('INFERENCE_SHAPE_PATH =', fp3) + f.close() + print('修改完成') + + +if __name__ == "__main__": + main() + print('main end') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/clear2345.sh b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/clear2345.sh new file mode 100644 index 0000000000000000000000000000000000000000..cf512f9a2ce0a227fe95fed96dd85816ec69656f --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/clear2345.sh @@ -0,0 +1,12 @@ +# 删除多余的输出.bin文件 +rm -rf ./result/dumpOutput_device*/*_2.bin +rm -rf ./result/dumpOutput_device*/*_3.bin +rm -rf ./result/dumpOutput_device*/*_4.bin +rm -rf ./result/dumpOutput_device*/*_5.bin + +# 将其他文件夹的.bin结果移动到同一个目录下 +mv ./result/dumpOutput_device1/* ./result/dumpOutput_device0/ +mv ./result/dumpOutput_device2/* ./result/dumpOutput_device0/ +mv ./result/dumpOutput_device3/* ./result/dumpOutput_device0/ + +echo 'clear2345.sh done' diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py new file mode 100644 index 0000000000000000000000000000000000000000..68f13f27c3ec643068e0a5662b610cc59325747b --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py @@ -0,0 +1,80 @@ +""" + Copyright 2020 Huawei Technologies Co., Ltd + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + Typical usage example: +""" +import os +import sys +from glob import glob +import pdb + + +def get_bin_info(file_path, info_name, shape, split4=True): + """ + @description: get given bin information + @param file_path bin file path + @param info_name given information name + @param shape image shape + @return + """ + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + print('共计.bin文件个数:', len(bin_images)) + print('info已写入:', os.path.abspath(info_name)) + if split4: # 是否切割为4卡的info + sths = ['sth1.info', 'sth2.info', 'sth3.info', 'sth4.info'] + length = len(bin_images) + step = length // 4 + b1 = bin_images[0: step] + b2 = bin_images[step: 2*step] + b3 = bin_images[2*step: 3*step] + b4 = bin_images[3*step:] + with open(sths[0], 'w') as file: + for index, img in enumerate(b1): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + with open(sths[1], 'w') as file: + for index, img in enumerate(b2): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + with open(sths[2], 'w') as file: + for index, img in enumerate(b3): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + with open(sths[3], 'w') as file: + for index, img in enumerate(b4): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + print('成功切分为四个子集', sths) + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + shape1 = sys.argv[4] + shape2 = sys.argv[5] + shape = [shape1, shape2] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, shape) + \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch new file mode 100644 index 0000000000000000000000000000000000000000..5307bdd0c26a0cd03c7b0b06b901239d4c101087 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch @@ -0,0 +1,4246 @@ +diff --git a/pytorch/nnunet/evaluation/model_selection/figure_out_want_to_submit2.py b/pytorch/nnunet/evaluation/model_selection/figure_out_want_to_submit2.py +new file mode 100644 +index 0000000..2a17e8a +--- /dev/null ++++ b/pytorch/nnunet/evaluation/model_selection/figure_out_want_to_submit2.py +@@ -0,0 +1,200 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++from itertools import combinations ++import nnunet ++from batchgenerators.utilities.file_and_folder_operations import * ++from nnunet.evaluation.add_mean_dice_to_json import foreground_mean ++from nnunet.evaluation.model_selection.ensemble import ensemble ++from nnunet.paths import network_training_output_dir ++import numpy as np ++from subprocess import call ++from nnunet.postprocessing.consolidate_postprocessing import consolidate_folds ++from nnunet.utilities.folder_names import get_output_folder_name ++from nnunet.paths import default_cascade_trainer, default_trainer, default_plans_identifier ++ ++ ++def find_task_name(folder, task_id): ++ candidates = subdirs(folder, prefix="Task%03.0d_" % task_id, join=False) ++ assert len(candidates) > 0, "no candidate for Task id %d found in folder %s" % (task_id, folder) ++ assert len(candidates) == 1, "more than one candidate for Task id %d found in folder %s" % (task_id, folder) ++ return candidates[0] ++ ++ ++def get_mean_foreground_dice(json_file): ++ results = load_json(json_file) ++ return get_foreground_mean(results) ++ ++ ++def get_foreground_mean(results): ++ results_mean = results['results']['mean'] ++ dice_scores = [results_mean[i]['Dice'] for i in results_mean.keys() if i != "0" and i != 'mean'] ++ return np.mean(dice_scores) ++ ++ ++def main(): ++ import argparse ++ parser = argparse.ArgumentParser(usage="This is intended to identify the best model based on the five fold " ++ "cross-validation. Running this script requires all models to have been run " ++ "already. This script will summarize the results of the five folds of all " ++ "models in one json each for easy interpretability") ++ ++ parser.add_argument("-m", '--models', nargs="+", required=False, default=['3d_fullres']) ++ parser.add_argument("-t", '--task_ids', nargs="+", required=False, default='003') ++ ++ parser.add_argument("-tr", type=str, required=False, default=default_trainer, ++ help="nnUNetTrainer class. Default: %s" % default_trainer) ++ parser.add_argument("-ctr", type=str, required=False, default=default_cascade_trainer, ++ help="nnUNetTrainer class for cascade model. Default: %s" % default_cascade_trainer) ++ parser.add_argument("-pl", type=str, required=False, default=default_plans_identifier, ++ help="plans name, Default: %s" % default_plans_identifier) ++ parser.add_argument('-f', '--folds', nargs='+', default=(0, 1, 2, 3, 4), help="use this if you have non-standard folds") ++ parser.add_argument("--strict", required=False, default=True, action="store_true", ++ help="set this flag if you want this script to crash of one of the models is missing") ++ ++ args = parser.parse_args() ++ tasks = [int(i) for i in args.task_ids] ++ ++ models = args.models ++ tr = args.tr ++ trc = args.ctr ++ strict = args.strict ++ pl = args.pl ++ folds = tuple(int(i) for i in args.folds) ++ ++ validation_folder = "validation_raw" ++ ++ # this script now acts independently from the summary jsons. That was unnecessary ++ id_task_mapping = {} ++ # for each task, run ensembling using all combinations of two models ++ for t in tasks: ++ # first collect pure model performance (postprocessed) ++ results = {} ++ all_results = {} ++ valid_models = [] ++ for m in models: ++ try: ++ if m == "3d_cascade_fullres": ++ trainer = trc ++ else: ++ trainer = tr ++ ++ if t not in id_task_mapping.keys(): ++ task_name = find_task_name(get_output_folder_name(m), t) ++ id_task_mapping[t] = task_name ++ ++ output_folder = get_output_folder_name(m, id_task_mapping[t], trainer, pl) ++ assert isdir(output_folder), "Output folder for model %s is missing, expected: %s" % (m, output_folder) ++ ++ # we need a postprocessing_json for inference, so that must be present ++ postprocessing_json = join(output_folder, "postprocessing.json") ++ # we need cv_niftis_postprocessed to know the single model performance ++ cv_niftis_folder = join(output_folder, "cv_niftis_raw") ++ if not isfile(postprocessing_json) or not isdir(cv_niftis_folder): ++ print("running missing postprocessing for %s and model %s" % (id_task_mapping[t], m)) ++ consolidate_folds(output_folder, folds=folds) ++ assert isfile(postprocessing_json), "Postprocessing json missing, expected: %s" % postprocessing_json ++ assert isdir(cv_niftis_folder), "Folder with niftis from CV missing, expected: %s" % cv_niftis_folder ++ ++ # obtain mean foreground dice ++ summary_file = join(cv_niftis_folder, "summary.json") ++ results[m] = get_mean_foreground_dice(summary_file) ++ foreground_mean(summary_file) ++ all_results[m] = load_json(summary_file)['results']['mean'] ++ valid_models.append(m) ++ ++ except Exception as e: ++ if strict: ++ raise e ++ else: ++ print("WARNING!") ++ print(e) ++ ++ # now run ensembling and add ensembling to results ++ print("\nFound the following valid models:\n", valid_models) ++ if len(valid_models) > 1: ++ for m1, m2 in combinations(valid_models, 2): ++ ++ trainer_m1 = trc if m1 == "3d_cascade_fullres" else tr ++ trainer_m2 = trc if m2 == "3d_cascade_fullres" else tr ++ ++ ensemble_name = "ensemble_" + m1 + "__" + trainer_m1 + "__" + pl + "--" + m2 + "__" + trainer_m2 + "__" + pl ++ output_folder_base = join(network_training_output_dir, "ensembles", id_task_mapping[t], ensemble_name) ++ maybe_mkdir_p(output_folder_base) ++ ++ network1_folder = get_output_folder_name(m1, id_task_mapping[t], trainer_m1, pl) ++ network2_folder = get_output_folder_name(m2, id_task_mapping[t], trainer_m2, pl) ++ ++ print("ensembling", network1_folder, network2_folder) ++ ensemble(network1_folder, network2_folder, output_folder_base, id_task_mapping[t], validation_folder, folds) ++ # ensembling will automatically do postprocessingget_foreground_mean ++ ++ # now get result of ensemble ++ results[ensemble_name] = get_mean_foreground_dice(join(output_folder_base, "ensembled_raw", "summary.json")) ++ summary_file = join(output_folder_base, "ensembled_raw", "summary.json") ++ foreground_mean(summary_file) ++ all_results[ensemble_name] = load_json(summary_file)['results']['mean'] ++ ++ # now print all mean foreground dice and highlight the best ++ foreground_dices = list(results.values()) ++ best = np.max(foreground_dices) ++ for k, v in results.items(): ++ print(k, v) ++ ++ predict_str = "" ++ best_model = None ++ for k, v in results.items(): ++ if v == best: ++ print("%s submit model %s" % (id_task_mapping[t], k), v) ++ best_model = k ++ print("\nHere is how you should predict test cases. Run in sequential order and replace all input and output folder names with your personalized ones\n") ++ if k.startswith("ensemble"): ++ tmp = k[len("ensemble_"):] ++ model1, model2 = tmp.split("--") ++ m1, t1, pl1 = model1.split("__") ++ m2, t2, pl2 = model2.split("__") ++ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL1 -tr " + tr + " -ctr " + trc + " -m " + m1 + " -p " + pl + " -t " + \ ++ id_task_mapping[t] + "\n" ++ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL2 -tr " + tr + " -ctr " + trc + " -m " + m2 + " -p " + pl + " -t " + \ ++ id_task_mapping[t] + "\n" ++ ++ predict_str += "nnUNet_ensemble -f OUTPUT_FOLDER_MODEL1 OUTPUT_FOLDER_MODEL2 -o OUTPUT_FOLDER -pp " + join(network_training_output_dir, "ensembles", id_task_mapping[t], k, "postprocessing.json") + "\n" ++ else: ++ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL1 -tr " + tr + " -ctr " + trc + " -m " + k + " -p " + pl + " -t " + \ ++ id_task_mapping[t] + "\n" ++ print(predict_str) ++ ++ summary_folder = join(network_training_output_dir, "ensembles", id_task_mapping[t]) ++ maybe_mkdir_p(summary_folder) ++ with open(join(summary_folder, "prediction_commands.txt"), 'w') as f: ++ f.write(predict_str) ++ ++ num_classes = len([i for i in all_results[best_model].keys() if i != 'mean']) ++ with open(join(summary_folder, "summary.csv"), 'w') as f: ++ f.write("model") ++ for c in range(1, num_classes): ++ f.write(",class%d" % c) ++ f.write(",average") ++ f.write("\n") ++ for m in all_results.keys(): ++ f.write(m) ++ for c in range(1, num_classes): ++ f.write(",%01.4f" % all_results[m][str(c)]["Dice"]) ++ f.write(",%01.4f" % all_results[m]['mean']["Dice"]) ++ f.write("\n") ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py b/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py +index cf5285a..a0384f0 100644 +--- a/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py ++++ b/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py +@@ -24,14 +24,14 @@ def crawl_and_remove_hidden_from_decathlon(folder): + "labelsTr and imagesTs" + subf = subfolders(folder, join=False) + assert 'imagesTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \ +- "folder that starts with TaskXX and has the subfolders imagesTr, " \ +- "labelsTr and imagesTs" ++ "folder that starts with TaskXX and has the subfolders imagesTr, " \ ++ "labelsTr and imagesTs" + assert 'imagesTs' in subf, "This does not seem to be a decathlon folder. Please give me a " \ +- "folder that starts with TaskXX and has the subfolders imagesTr, " \ +- "labelsTr and imagesTs" ++ "folder that starts with TaskXX and has the subfolders imagesTr, " \ ++ "labelsTr and imagesTs" + assert 'labelsTr' in subf, "This does not seem to be a decathlon folder. Please give me a " \ +- "folder that starts with TaskXX and has the subfolders imagesTr, " \ +- "labelsTr and imagesTs" ++ "folder that starts with TaskXX and has the subfolders imagesTr, " \ ++ "labelsTr and imagesTs" + _ = [os.remove(i) for i in subfiles(folder, prefix=".")] + _ = [os.remove(i) for i in subfiles(join(folder, 'imagesTr'), prefix=".")] + _ = [os.remove(i) for i in subfiles(join(folder, 'labelsTr'), prefix=".")] +@@ -45,9 +45,10 @@ def main(): + "therefore expect 3D niftixs instead, with one file per modality. " + "This utility will convert 4D MSD data into the format nnU-Net " + "expects") +- parser.add_argument("-i", help="Input folder. Must point to a TaskXX_TASKNAME folder as downloaded from the MSD " +- "website", required=True) +- parser.add_argument("-p", required=False, default=default_num_threads, type=int, ++ parser.add_argument("-i", required=False, default='/data/yupeng/Task03_Liver/', ++ help="Input folder. Must point to a TaskXX_TASKNAME folder as downloaded from the MSD " ++ "website") ++ parser.add_argument("-p", required=False, default=8, type=int, + help="Use this to specify how many processes are used to run the script. " + "Default is %d" % default_num_threads) + parser.add_argument("-output_task_id", required=False, default=None, type=int, +diff --git a/pytorch/nnunet/experiment_planning/nnUNet_plan_and_preprocess.py b/pytorch/nnunet/experiment_planning/nnUNet_plan_and_preprocess.py +index bb6785b..0b0ccd9 100644 +--- a/pytorch/nnunet/experiment_planning/nnUNet_plan_and_preprocess.py ++++ b/pytorch/nnunet/experiment_planning/nnUNet_plan_and_preprocess.py +@@ -28,10 +28,11 @@ def main(): + import argparse + + parser = argparse.ArgumentParser() +- parser.add_argument("-t", "--task_ids", nargs="+", help="List of integers belonging to the task ids you wish to run" +- " experiment planning and preprocessing for. Each of these " +- "ids must, have a matching folder 'TaskXXX_' in the raw " +- "data folder") ++ parser.add_argument("-t", "--task_ids", default="3", nargs="+", ++ help="List of integers belonging to the task ids you wish to run" ++ " experiment planning and preprocessing for. Each of these " ++ "ids must, have a matching folder 'TaskXXX_' in the raw " ++ "data folder") + parser.add_argument("-pl3d", "--planner3d", type=str, default="ExperimentPlanner3D_v21", + help="Name of the ExperimentPlanner class for the full resolution 3D U-Net and U-Net cascade. " + "Default is ExperimentPlanner3D_v21. Can be 'None', in which case these U-Nets will not be " +diff --git a/pytorch/nnunet/hyp_getnpz.py b/pytorch/nnunet/hyp_getnpz.py +new file mode 100644 +index 0000000..5113f93 +--- /dev/null ++++ b/pytorch/nnunet/hyp_getnpz.py +@@ -0,0 +1,36 @@ ++import numpy as np ++import os ++import nibabel as nib ++import pickle ++ ++ ++raw_data = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTr/liver_0_0000.nii.gz' ++crop_data = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_cropped_data/Task003_Liver/liver_0.npz' ++crop_data = '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetData_plans_v2.1_stage0/liver_0.npz' ++pickle_data = '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetPlansv2.1_plans_3D.pkl' ++ ++print('start') ++ ++p_data = pickle.load(open(pickle_data, 'rb')) ++ ++ ++ ++c_data = np.load(crop_data) ++print(c_data.files) ++ ++r_data = nib.load(raw_data).get_data() ++r_data = r_data / np.amax(r_data) ++ ++min2 = min(r_data) ++ ++for i in range(512): ++ for j in range(512): ++ for k in range(75): ++ data1 = r_data[i][j][k] ++ data2 = c_data.f.data[0][k][i][j] ++ if data1 != data2: ++ print("wrong") ++ break ++ ++ ++print('end') +\ No newline at end of file +diff --git a/pytorch/nnunet/inference/copy_val_to_test.py b/pytorch/nnunet/inference/copy_val_to_test.py +new file mode 100644 +index 0000000..405345b +--- /dev/null ++++ b/pytorch/nnunet/inference/copy_val_to_test.py +@@ -0,0 +1,19 @@ ++import os ++import shutil ++ ++# fold = 0 ++val_folder = '/root/heyupeng/environment/Task03_Liver/imagesTr/' ++test_folder = '/root/heyupeng/environment/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/' ++val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, 77, ++ 82] ++print('val_list:', val_list) ++for val in val_list: ++ source_file = 'liver_' + str(val) + '.nii.gz' ++ source_path = os.path.join(val_folder, source_file) ++ target_file = 'liver_' + str(val) + '_0000.nii.gz' ++ target_path = os.path.join(test_folder, target_file) ++ print('copy: ', source_path, '->', target_path) ++ shutil.copyfile(source_path, target_path) ++print('done') ++ ++ +diff --git a/pytorch/nnunet/inference/create_testset.py b/pytorch/nnunet/inference/create_testset.py +new file mode 100644 +index 0000000..cd13c1e +--- /dev/null ++++ b/pytorch/nnunet/inference/create_testset.py +@@ -0,0 +1,28 @@ ++import os ++import pdb ++import sys ++import shutil ++ ++ ++def main(input_path): ++ if input_path is None: ++ raise Exception('Parameter need to be filled in: input_path') ++ env_dist = os.environ ++ p1 = env_dist.get('nnUNet_raw_data_base') ++ val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, ++ 77, 82] # 数据集的验证集部分 ++ p2 = 'nnUNet_raw_data/Task003_Liver/imagesTr/' ++ target_path = os.path.join(p1, p2) ++ for v in val_list: ++ file_name = 'liver_' + str(v) + '_0000.nii.gz' ++ file_path = os.path.join(target_path, file_name) ++ # pdb.set_trace() ++ print('copy file:[', file_path, '] to folder:', input_path) ++ shutil.copy(file_path, input_path) ++ print('done') ++ ++ ++ ++if __name__ == "__main__": ++ input_path = sys.argv[1] ++ main(input_path) +diff --git a/pytorch/nnunet/inference/delete_other_data.py b/pytorch/nnunet/inference/delete_other_data.py +new file mode 100644 +index 0000000..b58367f +--- /dev/null ++++ b/pytorch/nnunet/inference/delete_other_data.py +@@ -0,0 +1,30 @@ ++import os ++import pdb ++ ++ ++def listdir(path, list_name): ++ for file in os.listdir(path): ++ file_path = os.path.join(path, file) ++ if os.path.isdir(file_path): ++ listdir(file_path, list_name) ++ elif os.path.splitext(file_path)[1] == '.gz': ++ list_name.append(file_path) ++ return list_name ++ ++val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, 77, ++ 82] ++target_folder = ['imagesTr', 'labelsTr', 'imagesTs'] ++for i in range(len(target_folder)): ++ t = target_folder[i] ++ if i == 2: ++ val_list = [132] ++ p = os.path.join('./Task03_Liver/', t) ++ files = [] ++ files = listdir(p, files) ++ files = set(files) ++ for e in val_list: ++ str_e = './Task03_Liver/' + t + '/liver_' + str(e) + '.nii.gz' ++ files.remove(str_e) ++ for f in files: ++ os.remove(f) ++print('end') +diff --git a/pytorch/nnunet/inference/gen_dataset_info.py b/pytorch/nnunet/inference/gen_dataset_info.py +new file mode 100644 +index 0000000..d1cb265 +--- /dev/null ++++ b/pytorch/nnunet/inference/gen_dataset_info.py +@@ -0,0 +1,83 @@ ++""" ++ Copyright 2020 Huawei Technologies Co., Ltd ++ ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ See the License for the specific language governing permissions and ++ limitations under the License. ++ Typical usage example: ++""" ++import os ++import sys ++from glob import glob ++import pdb ++ ++ ++def get_bin_info(file_path, info_name, shape, split4=True): ++ """ ++ @description: get given bin information ++ @param file_path bin file path ++ @param info_name given information name ++ @param shape image shape ++ @return ++ """ ++ bin_images = glob(os.path.join(file_path, '*.bin')) ++ with open(info_name, 'w') as file: ++ for index, img in enumerate(bin_images): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ print('info已写入:', info_name) ++ if split4: # 是否切割为4卡的info ++ sths = ['sth1.info', 'sth2.info', 'sth3.info', 'sth4.info'] ++ for i in range(len(sths)): ++ s = sths[i] ++ s = os.path.join(info_name, '..', s) ++ sths[i] = s ++ length = len(bin_images) ++ step = length // 4 ++ b1 = bin_images[0: step] ++ b2 = bin_images[step: 2*step] ++ b3 = bin_images[2*step: 3*step] ++ b4 = bin_images[3*step:] ++ with open(sths[0], 'w') as file: ++ for index, img in enumerate(b1): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ with open(sths[1], 'w') as file: ++ for index, img in enumerate(b2): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ with open(sths[2], 'w') as file: ++ for index, img in enumerate(b3): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ with open(sths[3], 'w') as file: ++ for index, img in enumerate(b4): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ print('成功切分为四个子集', sths) ++ ++ ++if __name__ == '__main__': ++ file_type = sys.argv[1] ++ file_path = sys.argv[2] ++ info_name = sys.argv[3] ++ if file_type == 'bin': ++ shape1 = sys.argv[4] ++ shape2 = sys.argv[5] ++ shape = [shape1, shape2] ++ assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' ++ get_bin_info(file_path, info_name, shape) ++ print('end main') +\ No newline at end of file +diff --git a/pytorch/nnunet/inference/infer_path.py b/pytorch/nnunet/inference/infer_path.py +new file mode 100644 +index 0000000..03ab90e +--- /dev/null ++++ b/pytorch/nnunet/inference/infer_path.py +@@ -0,0 +1,27 @@ ++import os ++import sys ++ ++# 历史残留设置,后续可以增加功能 ++INFERENCE_BIN_INPUT_FOLDER = None ++INFERENCE_BIN_OUTPUT_FOLDER = None ++ ++# 47服务器设置 ++# INFERENCE_INPUT_FOLDER = '/root/heyupeng2/environment/input/' # 存放待推理图像的文件夹 ++# INFERENCE_OUTPUT_FOLDER = '/root/heyupeng2/environment/output' # 推理完成后,存放推理结果的文件夹 ++# INFERENCE_SHAPE_PATH = '/root/heyupeng2/environment/' # 存放文件all_shape.txt的目录 ++# INFERENCE_BIN_INPUT_FOLDER = '/root/heyupeng2/environment/bin_files/' # 存放输入.bin文件的目录!!!!! ++# INFERENCE_BIN_OUTPUT_FOLDER = '/root/heyupeng2/result/dumpOutput_device0/' # 存放输出.bin文件的目录,用户请注意,该路径可能需要后续再确认!!!!!! ++ ++# 3090服务器设置 ++# INFERENCE_INPUT_FOLDER = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/' # 存放待推理图像的文件夹 ++# INFERENCE_OUTPUT_FOLDER = '/data/yupeng/environment_variables/output/' # 推理完成后,存放推理结果的文件夹 ++# INFERENCE_SHAPE_PATH = '/data/yupeng/environment_variables/' # 存放文件all_shape.txt的目录 ++# INFERENCE_BIN_INPUT_FOLDER = '/data/yupeng/environment_variables/output/bin_file/' # 存放输入.bin文件的目录 ++# INFERENCE_BIN_OUTPUT_FOLDER = '/data/yupeng/environment_variables/output/bin_file_benchmark/real_output/' # 存放输出.bin文件的目录,用户请注意,该路径可能需要后续再确认! ++ ++# 241服务器设置 ++INFERENCE_INPUT_FOLDER = '/home/modelzoo/contrib/ACL_PyTorch/Research/cv/segmentation/3D_Nested_Unet/environment/input/' ++INFERENCE_OUTPUT_FOLDER = '/home/modelzoo/contrib/ACL_PyTorch/Research/cv/segmentation/3D_Nested_Unet/environment/output/' ++INFERENCE_SHAPE_PATH = '/home/modelzoo/contrib/ACL_PyTorch/Research/cv/segmentation/3D_Nested_Unet/environment/' ++ ++ +diff --git a/pytorch/nnunet/inference/model2onnx.py b/pytorch/nnunet/inference/model2onnx.py +new file mode 100644 +index 0000000..e69de29 +diff --git a/pytorch/nnunet/inference/predict.py b/pytorch/nnunet/inference/predict.py +index fdb43bc..bf140cb 100644 +--- a/pytorch/nnunet/inference/predict.py ++++ b/pytorch/nnunet/inference/predict.py +@@ -177,8 +177,15 @@ def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_t + + print("emptying cuda cache") + torch.cuda.empty_cache() +- +- print("loading parameters for folds,", folds) ++ ''' ++ model='/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1' ++ folds=None ++ mixed_precision=True ++ checkpoint_name='model_final_checkpoint' ++ trainer=class-nnUNetPlusPlusTrainerV2 ++ params=list 5 -> dict 6 -> epoch state_dict optimizer_state_dict lr_scheduler_state_dict plot_stuff amp_grad_scaler ++ ''' ++ print("loading parameters for folds,", folds) # 得到参数,实际还未加载进模型 + trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) + + if segmentation_export_kwargs is None: +@@ -202,6 +209,7 @@ def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_t + all_output_files = [] + for preprocessed in preprocessing: + output_filename, (d, dct) = preprocessed ++ print('output_filename, d, dct = ', output_filename, d, dct) + all_output_files.append(all_output_files) + if isinstance(d, str): + data = np.load(d) +@@ -211,10 +219,19 @@ def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_t + print("predicting", output_filename) + softmax = [] + for p in params: ++ print("len(p)=", len(p)) + trainer.load_checkpoint_ram(p, False) + softmax.append(trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, trainer.data_aug_params[ + 'mirror_axes'], True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, + mixed_precision=mixed_precision)[1][None]) ++ ''' ++ d= ++ do_tta= ++ step_size= ++ all_in_gpu= ++ mixed_precision= ++ softmax= ++ ''' + + softmax = np.vstack(softmax) + softmax_mean = np.mean(softmax, 0) +diff --git a/pytorch/nnunet/inference/predict2.py b/pytorch/nnunet/inference/predict2.py +new file mode 100644 +index 0000000..263dbd2 +--- /dev/null ++++ b/pytorch/nnunet/inference/predict2.py +@@ -0,0 +1,845 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++import argparse ++from copy import deepcopy ++from typing import Tuple, Union, List ++ ++import numpy as np ++from batchgenerators.augmentations.utils import resize_segmentation ++from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax, save_segmentation_nifti ++from batchgenerators.utilities.file_and_folder_operations import * ++from multiprocessing import Process, Queue ++import torch ++import SimpleITK as sitk ++import shutil ++from multiprocessing import Pool ++from nnunet.postprocessing.connected_components import load_remove_save, load_postprocessing ++from nnunet.training.model_restore import load_model_and_checkpoint_files ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.utilities.one_hot_encoding import to_one_hot ++from nnunet.utilities.to_torch import maybe_to_torch, to_cuda ++import pdb ++ ++ ++def preprocess_save_to_queue(preprocess_fn, q, list_of_lists, output_files, segs_from_prev_stage, classes, ++ transpose_forward): ++ # suppress output ++ # sys.stdout = open(os.devnull, 'w') ++ ++ errors_in = [] ++ for i, l in enumerate(list_of_lists): ++ try: ++ output_file = output_files[i] ++ print("preprocessing", output_file) ++ d, _, dct = preprocess_fn(l) ++ # print(output_file, dct) ++ if segs_from_prev_stage[i] is not None: ++ assert isfile(segs_from_prev_stage[i]) and segs_from_prev_stage[i].endswith( ++ ".nii.gz"), "segs_from_prev_stage" \ ++ " must point to a " \ ++ "segmentation file" ++ seg_prev = sitk.GetArrayFromImage(sitk.ReadImage(segs_from_prev_stage[i])) ++ # check to see if shapes match ++ img = sitk.GetArrayFromImage(sitk.ReadImage(l[0])) ++ assert all([i == j for i, j in zip(seg_prev.shape, img.shape)]), "image and segmentation from previous " \ ++ "stage don't have the same pixel array " \ ++ "shape! image: %s, seg_prev: %s" % \ ++ (l[0], segs_from_prev_stage[i]) ++ seg_prev = seg_prev.transpose(transpose_forward) ++ seg_reshaped = resize_segmentation(seg_prev, d.shape[1:], order=1, cval=0) ++ seg_reshaped = to_one_hot(seg_reshaped, classes) ++ d = np.vstack((d, seg_reshaped)).astype(np.float32) ++ """There is a problem with python process communication that prevents us from communicating obejcts ++ larger than 2 GB between processes (basically when the length of the pickle string that will be sent is ++ communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long ++ enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually ++ patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will ++ then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either ++ filename or np.ndarray and will handle this automatically""" ++ print(d.shape) ++ if np.prod(d.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save, 4 because float32 is 4 bytes ++ print( ++ "This output is too large for python process-process communication. " ++ "Saving output temporarily to disk") ++ np.save(output_file[:-7] + ".npy", d) ++ d = output_file[:-7] + ".npy" ++ q.put((output_file, (d, dct))) ++ except KeyboardInterrupt: ++ raise KeyboardInterrupt ++ except Exception as e: ++ print("error in", l) ++ print(e) ++ q.put("end") ++ if len(errors_in) > 0: ++ print("There were some errors in the following cases:", errors_in) ++ print("These cases were ignored.") ++ else: ++ print("This worker has ended successfully, no errors to report") ++ # restore output ++ # sys.stdout = sys.__stdout__ ++ ++ ++def preprocess_multithreaded(trainer, list_of_lists, output_files, num_processes=2, segs_from_prev_stage=None): ++ if segs_from_prev_stage is None: ++ segs_from_prev_stage = [None] * len(list_of_lists) ++ ++ num_processes = min(len(list_of_lists), num_processes) ++ ++ classes = list(range(1, trainer.num_classes)) ++ assert isinstance(trainer, nnUNetTrainer) ++ q = Queue(1) ++ processes = [] ++ for i in range(num_processes): ++ pr = Process(target=preprocess_save_to_queue, args=(trainer.preprocess_patient, q, ++ list_of_lists[i::num_processes], ++ output_files[i::num_processes], ++ segs_from_prev_stage[i::num_processes], ++ classes, trainer.plans['transpose_forward'])) ++ pr.start() ++ processes.append(pr) ++ ++ try: ++ end_ctr = 0 ++ while end_ctr != num_processes: ++ item = q.get() ++ if item == "end": ++ end_ctr += 1 ++ continue ++ else: ++ yield item ++ ++ finally: ++ for p in processes: ++ if p.is_alive(): ++ p.terminate() # this should not happen but better safe than sorry right ++ p.join() ++ ++ q.close() ++ ++ ++def pth2onnx(model, output_file=r'/home/yupeng/HUAWEI/UNetPlusPlus/pytorch/nnunet/run/nnunetplusplus.onnx'): ++ # model = EfficientNet.from_pretrained('efficientnet-b0', weights_path=input_file) ++ # 调整模型为eval mode ++ model.eval() ++ # 输入节点名 ++ input_names = ["image"] ++ # 输出节点名 ++ output_names = ["class"] ++ dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} ++ dummy_input = torch.randn(1, 1, 128, 128, 128) ++ # dummy_input = to_cuda(dummy_input) ++ # verbose=True,支持打印onnx节点和对应的PyTorch代码行 ++ torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, ++ output_names=output_names, opset_version=11, verbose=True) ++ ++ ++def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_threads_preprocessing, ++ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, overwrite_existing=False, ++ all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", ++ segmentation_export_kwargs: dict = None, pre_mode=None, fp=None): ++ """ ++ :param segmentation_export_kwargs: ++ :param model: folder where the model is saved, must contain fold_x subfolders ++ :param list_of_lists: [[case0_0000.nii.gz, case0_0001.nii.gz], [case1_0000.nii.gz, case1_0001.nii.gz], ...] ++ :param output_filenames: [output_file_case0.nii.gz, output_file_case1.nii.gz, ...] ++ :param folds: default: (0, 1, 2, 3, 4) (but can also be 'all' or a subset of the five folds, for example use (0, ) ++ for using only fold_0 ++ :param save_npz: default: False ++ :param num_threads_preprocessing: ++ :param num_threads_nifti_save: ++ :param segs_from_prev_stage: ++ :param do_tta: default: True, can be set to False for a 8x speedup at the cost of a reduced segmentation quality ++ :param overwrite_existing: default: True ++ :param mixed_precision: if None then we take no action. If True/False we overwrite what the model has in its init ++ :return: ++ """ ++ assert len(list_of_lists) == len(output_filenames) ++ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) ++ ++ pool = Pool(num_threads_nifti_save) ++ results = [] ++ ++ cleaned_output_files = [] ++ for o in output_filenames: ++ dr, f = os.path.split(o) ++ if len(dr) > 0: ++ maybe_mkdir_p(dr) ++ if not f.endswith(".nii.gz"): ++ f, _ = os.path.splitext(f) ++ f = f + ".nii.gz" ++ cleaned_output_files.append(join(dr, f)) ++ ++ if not overwrite_existing: ++ print("number of cases:", len(list_of_lists)) ++ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] ++ ++ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] ++ list_of_lists = [list_of_lists[i] for i in not_done_idx] ++ if segs_from_prev_stage is not None: ++ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] ++ ++ print("number of cases that still need to be predicted:", len(cleaned_output_files)) ++ ++ print("emptying cuda cache") ++ torch.cuda.empty_cache() ++ ''' ++ model='/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1' ++ folds=None ++ mixed_precision=True ++ checkpoint_name='model_final_checkpoint' ++ trainer=class-nnUNetPlusPlusTrainerV2 ++ params=list 5 -> dict 6 -> epoch state_dict optimizer_state_dict lr_scheduler_state_dict plot_stuff amp_grad_scaler ++ ''' ++ print("loading parameters for folds,", folds) # 得到参数,实际还未加载进模型 ++ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) ++ ++ if segmentation_export_kwargs is None: ++ if 'segmentation_export_params' in trainer.plans.keys(): ++ force_separate_z = trainer.plans['segmentation_export_params']['force_separate_z'] ++ interpolation_order = trainer.plans['segmentation_export_params']['interpolation_order'] ++ interpolation_order_z = trainer.plans['segmentation_export_params']['interpolation_order_z'] ++ else: # 走到这里 ++ force_separate_z = None ++ interpolation_order = 1 ++ interpolation_order_z = 0 ++ else: ++ force_separate_z = segmentation_export_kwargs['force_separate_z'] ++ interpolation_order = segmentation_export_kwargs['interpolation_order'] ++ interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] ++ ++ print("starting preprocessing generator") ++ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, ++ segs_from_prev_stage) ++ # unet++V2class, [['/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/liver_132_0000.nii.gz']] ++ # ['/data/yupeng/environment_variables/output/liver_132.nii.gz'], 6, None ++ print("starting prediction...") ++ if int(pre_mode) == -1: ++ p = params[0] ++ trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 ++ print('pth2onnx start') ++ pth2onnx(trainer.network, fp) ++ print('pth2onnx end') ++ print('onnx模型已经输出至:', fp) ++ import sys ++ sys.exit(0) ++ all_output_files = [] ++ for preprocessed in preprocessing: ++ output_filename, (d, dct) = preprocessed ++ print('output_filename, d, dct = ', output_filename, d, dct) ++ all_output_files.append(all_output_files) ++ if isinstance(d, str): ++ data = np.load(d) ++ os.remove(d) ++ d = data ++ print("predicting", output_filename) ++ softmax = [] ++ params = [params[0]] # 只求第一个模型的推理结果 ++ for p in params: ++ # trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 ++ # output_filename = '/data/yupeng/environment_variables/output/liver_132.nii.gz' ++ ttttt = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, trainer.data_aug_params[ ++ 'mirror_axes'], True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, ++ mixed_precision=mixed_precision, img_name=output_filename, pre_mode=pre_mode, fp=fp) # tuple(ndarray 489 500 500; 3 489 500 500) ++ softmax.append(ttttt[1][None]) # 扩充了1 3 489 500 500 ++ ''' ++ d= ++ do_tta= ++ step_size= ++ all_in_gpu= ++ mixed_precision= ++ softmax= ++ ''' ++ # softmax是list 5,每个元素是ndarray 1 3 489 500 500 ++ softmax = np.vstack(softmax) # 5 3 489 500 500 ++ softmax_mean = np.mean(softmax, 0) # 3 489 500 500 ++ ++ transpose_forward = trainer.plans.get('transpose_forward') # [0,1,2] ++ if transpose_forward is not None: ++ transpose_backward = trainer.plans.get('transpose_backward') ++ softmax_mean = softmax_mean.transpose([0] + [i + 1 for i in transpose_backward]) ++ ++ if save_npz: # False ++ npz_file = output_filename[:-7] + ".npz" ++ else: ++ npz_file = None ++ ++ if hasattr(trainer, 'regions_class_order'): # False ++ region_class_order = trainer.regions_class_order ++ else: ++ region_class_order = None ++ ++ """There is a problem with python process communication that prevents us from communicating obejcts ++ larger than 2 GB between processes (basically when the length of the pickle string that will be sent is ++ communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long ++ enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually ++ patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will ++ then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either ++ filename or np.ndarray and will handle this automatically""" ++ bytes_per_voxel = 4 ++ if all_in_gpu: ++ bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) ++ if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save ++ print( ++ "This output is too large for python process-process communication. Saving output temporarily to disk") ++ np.save(output_filename[:-7] + ".npy", softmax_mean) ++ softmax_mean = output_filename[:-7] + ".npy" ++ ++ results.append(pool.starmap_async(save_segmentation_nifti_from_softmax, ++ ((softmax_mean, output_filename, dct, interpolation_order, region_class_order, ++ None, None, ++ npz_file, None, force_separate_z, interpolation_order_z),) ++ )) ++ ++ print("inference done. Now waiting for the segmentation export to finish...") ++ _ = [i.get() for i in results] ++ # now apply postprocessing ++ # first load the postprocessing properties if they are present. Else raise a well visible warning ++ results = [] ++ pp_file = join(model, "postprocessing.json") # '/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/postprocessing.json' ++ if isfile(pp_file): ++ print("postprocessing...") ++ shutil.copy(pp_file, os.path.abspath(os.path.dirname(output_filenames[0]))) ++ # for_which_classes stores for which of the classes everything but the largest connected component needs to be ++ # removed ++ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) ++ results.append(pool.starmap_async(load_remove_save, ++ zip(output_filenames, output_filenames, ++ [for_which_classes] * len(output_filenames), ++ [min_valid_obj_size] * len(output_filenames)))) ++ _ = [i.get() for i in results] ++ else: ++ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " ++ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " ++ "%s" % model) ++ ++ pool.close() ++ pool.join() ++ ++def predict_cases_fast(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, ++ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, ++ overwrite_existing=False, ++ all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", ++ segmentation_export_kwargs: dict = None): ++ assert len(list_of_lists) == len(output_filenames) ++ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) ++ ++ pool = Pool(num_threads_nifti_save) ++ results = [] ++ ++ cleaned_output_files = [] ++ for o in output_filenames: ++ dr, f = os.path.split(o) ++ if len(dr) > 0: ++ maybe_mkdir_p(dr) ++ if not f.endswith(".nii.gz"): ++ f, _ = os.path.splitext(f) ++ f = f + ".nii.gz" ++ cleaned_output_files.append(join(dr, f)) ++ ++ if not overwrite_existing: ++ print("number of cases:", len(list_of_lists)) ++ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] ++ ++ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] ++ list_of_lists = [list_of_lists[i] for i in not_done_idx] ++ if segs_from_prev_stage is not None: ++ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] ++ ++ print("number of cases that still need to be predicted:", len(cleaned_output_files)) ++ ++ print("emptying cuda cache") ++ torch.cuda.empty_cache() ++ ++ print("loading parameters for folds,", folds) ++ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) ++ ++ if segmentation_export_kwargs is None: ++ if 'segmentation_export_params' in trainer.plans.keys(): ++ force_separate_z = trainer.plans['segmentation_export_params']['force_separate_z'] ++ interpolation_order = trainer.plans['segmentation_export_params']['interpolation_order'] ++ interpolation_order_z = trainer.plans['segmentation_export_params']['interpolation_order_z'] ++ else: ++ force_separate_z = None ++ interpolation_order = 1 ++ interpolation_order_z = 0 ++ else: ++ force_separate_z = segmentation_export_kwargs['force_separate_z'] ++ interpolation_order = segmentation_export_kwargs['interpolation_order'] ++ interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] ++ ++ print("starting preprocessing generator") ++ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, ++ segs_from_prev_stage) ++ ++ print("starting prediction...") ++ for preprocessed in preprocessing: ++ print("getting data from preprocessor") ++ output_filename, (d, dct) = preprocessed ++ print("got something") ++ if isinstance(d, str): ++ print("what I got is a string, so I need to load a file") ++ data = np.load(d) ++ os.remove(d) ++ d = data ++ ++ # preallocate the output arrays ++ # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) ++ softmax_aggr = None # np.zeros((trainer.num_classes, *d.shape[1:]), dtype=np.float16) ++ all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) ++ print("predicting", output_filename) ++ ++ for i, p in enumerate(params): ++ trainer.load_checkpoint_ram(p, False) ++ ++ res = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, ++ trainer.data_aug_params['mirror_axes'], True, ++ step_size=step_size, use_gaussian=True, ++ all_in_gpu=all_in_gpu, ++ mixed_precision=mixed_precision) ++ ++ if len(params) > 1: ++ # otherwise we dont need this and we can save ourselves the time it takes to copy that ++ print("aggregating softmax") ++ if softmax_aggr is None: ++ softmax_aggr = res[1] ++ else: ++ softmax_aggr += res[1] ++ all_seg_outputs[i] = res[0] ++ ++ print("obtaining segmentation map") ++ if len(params) > 1: ++ # we dont need to normalize the softmax by 1 / len(params) because this would not change the outcome of the argmax ++ seg = softmax_aggr.argmax(0) ++ else: ++ seg = all_seg_outputs[0] ++ ++ print("applying transpose_backward") ++ transpose_forward = trainer.plans.get('transpose_forward') ++ if transpose_forward is not None: ++ transpose_backward = trainer.plans.get('transpose_backward') ++ seg = seg.transpose([i for i in transpose_backward]) ++ ++ print("initializing segmentation export") ++ results.append(pool.starmap_async(save_segmentation_nifti, ++ ((seg, output_filename, dct, interpolation_order, force_separate_z, ++ interpolation_order_z),) ++ )) ++ print("done") ++ ++ print("inference done. Now waiting for the segmentation export to finish...") ++ _ = [i.get() for i in results] ++ # now apply postprocessing ++ # first load the postprocessing properties if they are present. Else raise a well visible warning ++ results = [] ++ pp_file = join(model, "postprocessing.json") ++ if isfile(pp_file): ++ print("postprocessing...") ++ shutil.copy(pp_file, os.path.dirname(output_filenames[0])) ++ # for_which_classes stores for which of the classes everything but the largest connected component needs to be ++ # removed ++ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) ++ results.append(pool.starmap_async(load_remove_save, ++ zip(output_filenames, output_filenames, ++ [for_which_classes] * len(output_filenames), ++ [min_valid_obj_size] * len(output_filenames)))) ++ _ = [i.get() for i in results] ++ else: ++ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " ++ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " ++ "%s" % model) ++ ++ pool.close() ++ pool.join() ++ ++ ++def predict_cases_fastest(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, ++ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, ++ overwrite_existing=False, all_in_gpu=True, step_size=0.5, ++ checkpoint_name="model_final_checkpoint"): ++ assert len(list_of_lists) == len(output_filenames) ++ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) ++ ++ pool = Pool(num_threads_nifti_save) ++ results = [] ++ ++ cleaned_output_files = [] ++ for o in output_filenames: ++ dr, f = os.path.split(o) ++ if len(dr) > 0: ++ maybe_mkdir_p(dr) ++ if not f.endswith(".nii.gz"): ++ f, _ = os.path.splitext(f) ++ f = f + ".nii.gz" ++ cleaned_output_files.append(join(dr, f)) ++ ++ if not overwrite_existing: ++ print("number of cases:", len(list_of_lists)) ++ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] ++ ++ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] ++ list_of_lists = [list_of_lists[i] for i in not_done_idx] ++ if segs_from_prev_stage is not None: ++ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] ++ ++ print("number of cases that still need to be predicted:", len(cleaned_output_files)) ++ ++ print("emptying cuda cache") ++ torch.cuda.empty_cache() ++ ++ print("loading parameters for folds,", folds) ++ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) ++ ++ print("starting preprocessing generator") ++ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, ++ segs_from_prev_stage) ++ ++ print("starting prediction...") ++ for preprocessed in preprocessing: ++ print("getting data from preprocessor") ++ output_filename, (d, dct) = preprocessed ++ print("got something") ++ if isinstance(d, str): ++ print("what I got is a string, so I need to load a file") ++ data = np.load(d) ++ os.remove(d) ++ d = data ++ ++ # preallocate the output arrays ++ # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) ++ all_softmax_outputs = np.zeros((len(params), trainer.num_classes, *d.shape[1:]), dtype=np.float16) ++ all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) ++ print("predicting", output_filename) ++ ++ for i, p in enumerate(params): ++ trainer.load_checkpoint_ram(p, False) ++ res = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, ++ trainer.data_aug_params['mirror_axes'], True, ++ step_size=step_size, use_gaussian=True, ++ all_in_gpu=all_in_gpu, ++ mixed_precision=mixed_precision) ++ if len(params) > 1: ++ # otherwise we dont need this and we can save ourselves the time it takes to copy that ++ all_softmax_outputs[i] = res[1] ++ all_seg_outputs[i] = res[0] ++ ++ print("aggregating predictions") ++ if len(params) > 1: ++ softmax_mean = np.mean(all_softmax_outputs, 0) ++ seg = softmax_mean.argmax(0) ++ else: ++ seg = all_seg_outputs[0] ++ ++ print("applying transpose_backward") ++ transpose_forward = trainer.plans.get('transpose_forward') ++ if transpose_forward is not None: ++ transpose_backward = trainer.plans.get('transpose_backward') ++ seg = seg.transpose([i for i in transpose_backward]) ++ ++ print("initializing segmentation export") ++ results.append(pool.starmap_async(save_segmentation_nifti, ++ ((seg, output_filename, dct, 0, None),) ++ )) ++ print("done") ++ ++ print("inference done. Now waiting for the segmentation export to finish...") ++ _ = [i.get() for i in results] ++ # now apply postprocessing ++ # first load the postprocessing properties if they are present. Else raise a well visible warning ++ results = [] ++ pp_file = join(model, "postprocessing.json") ++ if isfile(pp_file): ++ print("postprocessing...") ++ shutil.copy(pp_file, os.path.dirname(output_filenames[0])) ++ # for_which_classes stores for which of the classes everything but the largest connected component needs to be ++ # removed ++ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) ++ results.append(pool.starmap_async(load_remove_save, ++ zip(output_filenames, output_filenames, ++ [for_which_classes] * len(output_filenames), ++ [min_valid_obj_size] * len(output_filenames)))) ++ _ = [i.get() for i in results] ++ else: ++ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " ++ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " ++ "%s" % model) ++ ++ pool.close() ++ pool.join() ++ ++ ++def check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities): ++ print("This model expects %d input modalities for each image" % expected_num_modalities) ++ files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) ++ ++ maybe_case_ids = np.unique([i[:-12] for i in files]) ++ ++ remaining = deepcopy(files) ++ missing = [] ++ ++ assert len(files) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)" ++ ++ # now check if all required files are present and that no unexpected files are remaining ++ for c in maybe_case_ids: ++ for n in range(expected_num_modalities): ++ expected_output_file = c + "_%04.0d.nii.gz" % n ++ if not isfile(join(input_folder, expected_output_file)): ++ missing.append(expected_output_file) ++ else: ++ remaining.remove(expected_output_file) ++ ++ print("Found %d unique case ids, here are some examples:" % len(maybe_case_ids), ++ np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10))) ++ print("If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc") ++ ++ if len(remaining) > 0: ++ print("found %d unexpected remaining files in the folder. Here are some examples:" % len(remaining), ++ np.random.choice(remaining, min(len(remaining), 10))) ++ ++ if len(missing) > 0: ++ print("Some files are missing:") ++ print(missing) ++ raise RuntimeError("missing files in input_folder") ++ ++ return maybe_case_ids ++ ++ ++def predict_from_folder(model: str, input_folder: str, output_folder: str, folds: Union[Tuple[int], List[int]], ++ save_npz: bool, num_threads_preprocessing: int, num_threads_nifti_save: int, ++ lowres_segmentations: Union[str, None], ++ part_id: int, num_parts: int, tta: bool, mixed_precision: bool = True, ++ overwrite_existing: bool = True, mode: str = 'normal', overwrite_all_in_gpu: bool = None, ++ step_size: float = 0.5, checkpoint_name: str = "model_final_checkpoint", ++ segmentation_export_kwargs: dict = None, pre_mode=None, fp=None): ++ """ ++ here we use the standard naming scheme to generate list_of_lists and output_files needed by predict_cases ++ ++ :param model: ++ :param input_folder: ++ :param output_folder: ++ :param folds: ++ :param save_npz: ++ :param num_threads_preprocessing: ++ :param num_threads_nifti_save: ++ :param lowres_segmentations: ++ :param part_id: ++ :param num_parts: ++ :param tta: ++ :param mixed_precision: ++ :param overwrite_existing: if not None then it will be overwritten with whatever is in there. None is default (no overwrite) ++ :return: ++ """ ++ maybe_mkdir_p(output_folder) ++ shutil.copy(join(model, 'plans.pkl'), output_folder) ++ ++ assert isfile(join(model, "plans.pkl")), "Folder with saved model weights must contain a plans.pkl file" ++ expected_num_modalities = load_pickle(join(model, "plans.pkl"))['num_modalities'] ++ ++ # check input folder integrity ++ case_ids = check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities) ++ ++ output_files = [join(output_folder, i + ".nii.gz") for i in case_ids] ++ all_files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) ++ list_of_lists = [[join(input_folder, i) for i in all_files if i[:len(j)].startswith(j) and ++ len(i) == (len(j) + 12)] for j in case_ids] ++ ++ if lowres_segmentations is not None: ++ assert isdir(lowres_segmentations), "if lowres_segmentations is not None then it must point to a directory" ++ lowres_segmentations = [join(lowres_segmentations, i + ".nii.gz") for i in case_ids] ++ assert all([isfile(i) for i in lowres_segmentations]), "not all lowres_segmentations files are present. " \ ++ "(I was searching for case_id.nii.gz in that folder)" ++ lowres_segmentations = lowres_segmentations[part_id::num_parts] ++ else: ++ lowres_segmentations = None ++ ++ if mode == "normal": # step this ++ if overwrite_all_in_gpu is None: # True ++ all_in_gpu = False ++ else: ++ all_in_gpu = overwrite_all_in_gpu ++ ++ return predict_cases(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, ++ save_npz, num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, tta, ++ mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, ++ step_size=step_size, checkpoint_name=checkpoint_name, ++ segmentation_export_kwargs=segmentation_export_kwargs, pre_mode=pre_mode, fp=fp) ++ elif mode == "fast": ++ if overwrite_all_in_gpu is None: ++ all_in_gpu = True ++ else: ++ all_in_gpu = overwrite_all_in_gpu ++ ++ assert save_npz is False ++ return predict_cases_fast(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, ++ num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, ++ tta, mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, ++ step_size=step_size, checkpoint_name=checkpoint_name, ++ segmentation_export_kwargs=segmentation_export_kwargs) ++ elif mode == "fastest": ++ if overwrite_all_in_gpu is None: ++ all_in_gpu = True ++ else: ++ all_in_gpu = overwrite_all_in_gpu ++ ++ assert save_npz is False ++ return predict_cases_fastest(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, ++ num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, ++ tta, mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, ++ step_size=step_size, checkpoint_name=checkpoint_name) ++ else: ++ raise ValueError("unrecognized mode. Must be normal, fast or fastest") ++ ++ ++if __name__ == "__main__": ++ parser = argparse.ArgumentParser() ++ parser.add_argument("-i", '--input_folder', help="Must contain all modalities for each patient in the correct" ++ " order (same as training). Files must be named " ++ "CASENAME_XXXX.nii.gz where XXXX is the modality " ++ "identifier (0000, 0001, etc)", required=True) ++ parser.add_argument('-o', "--output_folder", required=True, help="folder for saving predictions") ++ parser.add_argument('-m', '--model_output_folder', ++ help='model output folder. Will automatically discover the folds ' ++ 'that were ' ++ 'run and use those as an ensemble', required=True) ++ parser.add_argument('-f', '--folds', nargs='+', default='None', help="folds to use for prediction. Default is None " ++ "which means that folds will be detected " ++ "automatically in the model output folder") ++ parser.add_argument('-z', '--save_npz', required=False, action='store_true', help="use this if you want to ensemble" ++ " these predictions with those of" ++ " other models. Softmax " ++ "probabilities will be saved as " ++ "compresed numpy arrays in " ++ "output_folder and can be merged " ++ "between output_folders with " ++ "merge_predictions.py") ++ parser.add_argument('-l', '--lowres_segmentations', required=False, default='None', help="if model is the highres " ++ "stage of the cascade then you need to use -l to specify where the segmentations of the " ++ "corresponding lowres unet are. Here they are required to do a prediction") ++ parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (for example via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ parser.add_argument("--num_parts", type=int, required=False, default=1, ++ help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ parser.add_argument("--num_threads_preprocessing", required=False, default=6, type=int, help= ++ "Determines many background processes will be used for data preprocessing. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 6") ++ parser.add_argument("--num_threads_nifti_save", required=False, default=2, type=int, help= ++ "Determines many background processes will be used for segmentation export. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 2") ++ parser.add_argument("--tta", required=False, type=int, default=1, help="Set to 0 to disable test time data " ++ "augmentation (speedup of factor " ++ "4(2D)/8(3D)), " ++ "lower quality segmentations") ++ parser.add_argument("--overwrite_existing", required=False, type=int, default=1, help="Set this to 0 if you need " ++ "to resume a previous " ++ "prediction. Default: 1 " ++ "(=existing segmentations " ++ "in output_folder will be " ++ "overwritten)") ++ parser.add_argument("--mode", type=str, default="normal", required=False) ++ parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True") ++ parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations, has no effect if mode=fastest") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z is z is done differently") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest") ++ parser.add_argument('--disable_mixed_precision', default=False, action='store_true', required=False, ++ help='Predictions are done with mixed precision by default. This improves speed and reduces ' ++ 'the required vram. If you want to disable mixed precision you can set this flag. Note ' ++ 'that yhis is not recommended (mixed precision is ~2x faster!)') ++ ++ args = parser.parse_args() ++ input_folder = args.input_folder ++ output_folder = args.output_folder ++ part_id = args.part_id ++ num_parts = args.num_parts ++ model = args.model_output_folder ++ folds = args.folds ++ save_npz = args.save_npz ++ lowres_segmentations = args.lowres_segmentations ++ num_threads_preprocessing = args.num_threads_preprocessing ++ num_threads_nifti_save = args.num_threads_nifti_save ++ tta = args.tta ++ step_size = args.step_size ++ ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ overwrite = args.overwrite_existing ++ mode = args.mode ++ all_in_gpu = args.all_in_gpu ++ ++ if lowres_segmentations == "None": ++ lowres_segmentations = None ++ ++ if isinstance(folds, list): ++ if folds[0] == 'all' and len(folds) == 1: ++ pass ++ else: ++ folds = [int(i) for i in folds] ++ elif folds == "None": ++ folds = None ++ else: ++ raise ValueError("Unexpected value for argument folds") ++ ++ if tta == 0: ++ tta = False ++ elif tta == 1: ++ tta = True ++ else: ++ raise ValueError("Unexpected value for tta, Use 1 or 0") ++ ++ if overwrite == 0: ++ overwrite = False ++ elif overwrite == 1: ++ overwrite = True ++ else: ++ raise ValueError("Unexpected value for overwrite, Use 1 or 0") ++ ++ assert all_in_gpu in ['None', 'False', 'True'] ++ if all_in_gpu == "None": ++ all_in_gpu = None ++ elif all_in_gpu == "True": ++ all_in_gpu = True ++ elif all_in_gpu == "False": ++ all_in_gpu = False ++ ++ predict_from_folder(model, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, ++ num_threads_nifti_save, lowres_segmentations, part_id, num_parts, tta, mixed_precision=not args.disable_mixed_precision, ++ overwrite_existing=overwrite, mode=mode, overwrite_all_in_gpu=all_in_gpu, step_size=step_size) +diff --git a/pytorch/nnunet/inference/predict_simple2.py b/pytorch/nnunet/inference/predict_simple2.py +new file mode 100644 +index 0000000..2af423e +--- /dev/null ++++ b/pytorch/nnunet/inference/predict_simple2.py +@@ -0,0 +1,238 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++import argparse ++import torch ++ ++from nnunet.inference.predict2 import predict_from_folder ++from nnunet.paths import default_plans_identifier, network_training_output_dir, default_cascade_trainer, default_trainer ++from batchgenerators.utilities.file_and_folder_operations import join, isdir ++from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++from nnunet.inference.infer_path import INFERENCE_INPUT_FOLDER, INFERENCE_OUTPUT_FOLDER ++ ++ ++def main(): ++ parser = argparse.ArgumentParser() ++ parser.add_argument("-i", '--input_folder', help="Must contain all modalities for each patient in the correct" ++ " order (same as training). Files must be named " ++ "CASENAME_XXXX.nii.gz where XXXX is the modality " ++ "identifier (0000, 0001, etc)", required=False, ++ default=INFERENCE_INPUT_FOLDER) ++ parser.add_argument('-o', "--output_folder", required=False, ++ default=INFERENCE_OUTPUT_FOLDER, help="folder for saving predictions") ++ parser.add_argument('-t', '--task_name', help='task name or task ID, required.', ++ default="Task003_Liver", required=False) ++ parser.add_argument('-pm', '--pre_mode', help='predict mode', required=False, default=-1) ++ parser.add_argument('-fp', '--file_path', help='input or output file path for npu bin files', required=True) ++ parser.add_argument('-tr', '--trainer_class_name', ++ help='Name of the nnUNetTrainer used for 2D U-Net, full resolution 3D U-Net and low resolution ' ++ 'U-Net. The default is %s. If you are running inference with the cascade and the folder ' ++ 'pointed to by --lowres_segmentations does not contain the segmentation maps generated by ' ++ 'the low resolution U-Net then the low resolution segmentation maps will be automatically ' ++ 'generated. For this case, make sure to set the trainer class here that matches your ' ++ '--cascade_trainer_class_name (this part can be ignored if defaults are used).' ++ % default_trainer, ++ required=False, ++ default="nnUNetPlusPlusTrainerV2") ++ parser.add_argument('-ctr', '--cascade_trainer_class_name', ++ help="Trainer class name used for predicting the 3D full resolution U-Net part of the cascade." ++ "Default is %s" % default_cascade_trainer, required=False, ++ default=default_cascade_trainer) ++ ++ parser.add_argument('-m', '--model', help="2d, 3d_lowres, 3d_fullres or 3d_cascade_fullres. Default: 3d_fullres", ++ default="3d_fullres", required=False) ++ ++ parser.add_argument('-p', '--plans_identifier', help='do not touch this unless you know what you are doing', ++ default=default_plans_identifier, required=False) ++ ++ parser.add_argument('-f', '--folds', nargs='+', default="None", ++ help="folds to use for prediction. Default is None which means that folds will be detected " ++ "automatically in the model output folder") ++ ++ parser.add_argument('-z', '--save_npz', required=False, action='store_true', ++ help="use this if you want to ensemble these predictions with those of other models. Softmax " ++ "probabilities will be saved as compressed numpy arrays in output_folder and can be " ++ "merged between output_folders with nnUNet_ensemble_predictions") ++ ++ parser.add_argument('-l', '--lowres_segmentations', required=False, default='None', ++ help="if model is the highres stage of the cascade then you can use this folder to provide " ++ "predictions from the low resolution 3D U-Net. If this is left at default, the " ++ "predictions will be generated automatically (provided that the 3D low resolution U-Net " ++ "network weights are present") ++ ++ parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (for example via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ ++ parser.add_argument("--num_parts", type=int, required=False, default=1, ++ help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ ++ parser.add_argument("--num_threads_preprocessing", required=False, default=6, type=int, help= ++ "Determines many background processes will be used for data preprocessing. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 6") ++ ++ parser.add_argument("--num_threads_nifti_save", required=False, default=2, type=int, help= ++ "Determines many background processes will be used for segmentation export. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 2") ++ ++ parser.add_argument("--disable_tta", required=False, default=False, action="store_true", ++ help="set this flag to disable test time data augmentation via mirroring. Speeds up inference " ++ "by roughly factor 4 (2D) or 8 (3D)") ++ ++ parser.add_argument("--overwrite_existing", required=False, default=False, action="store_true", ++ help="Set this flag if the target folder contains predictions that you would like to overwrite") ++ ++ parser.add_argument("--mode", type=str, default="normal", required=False, help="Hands off!") ++ parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True. " ++ "Do not touch.") ++ parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations, has no effect if mode=fastest. Do not touch this.") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z is z is done differently. Do not touch this.") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest. " ++ # "Do not touch this.") ++ parser.add_argument('-chk', ++ help='checkpoint name, default: model_final_checkpoint', ++ required=False, ++ default='model_final_checkpoint') ++ parser.add_argument('--disable_mixed_precision', default=False, action='store_true', required=False, ++ help='Predictions are done with mixed precision by default. This improves speed and reduces ' ++ 'the required vram. If you want to disable mixed precision you can set this flag. Note ' ++ 'that yhis is not recommended (mixed precision is ~2x faster!)') ++ ++ args = parser.parse_args() ++ print(args) ++ ++ input_folder = args.input_folder ++ output_folder = args.output_folder ++ part_id = args.part_id ++ # 推理模式 ++ pre_mode = args.pre_mode ++ fp = args.file_path ++ num_parts = args.num_parts ++ folds = args.folds ++ save_npz = args.save_npz ++ lowres_segmentations = args.lowres_segmentations ++ num_threads_preprocessing = args.num_threads_preprocessing ++ num_threads_nifti_save = args.num_threads_nifti_save ++ disable_tta = args.disable_tta ++ step_size = args.step_size ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ overwrite_existing = args.overwrite_existing ++ mode = args.mode ++ all_in_gpu = args.all_in_gpu ++ model = args.model ++ trainer_class_name = args.trainer_class_name ++ cascade_trainer_class_name = args.cascade_trainer_class_name ++ ++ task_name = args.task_name ++ ++ if not task_name.startswith("Task"): ++ task_id = int(task_name) ++ task_name = convert_id_to_task_name(task_id) ++ ++ assert model in ["2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"], "-m must be 2d, 3d_lowres, 3d_fullres or " \ ++ "3d_cascade_fullres" ++ ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ if lowres_segmentations == "None": ++ lowres_segmentations = None ++ ++ if isinstance(folds, list): ++ if folds[0] == 'all' and len(folds) == 1: ++ pass ++ else: ++ folds = [int(i) for i in folds] ++ elif folds == "None": ++ folds = None ++ else: ++ raise ValueError("Unexpected value for argument folds") ++ ++ assert all_in_gpu in ['None', 'False', 'True'] ++ if all_in_gpu == "None": ++ all_in_gpu = None ++ elif all_in_gpu == "True": ++ all_in_gpu = True ++ elif all_in_gpu == "False": ++ all_in_gpu = False ++ ++ # we need to catch the case where model is 3d cascade fullres and the low resolution folder has not been set. ++ # In that case we need to try and predict with 3d low res first ++ if model == "3d_cascade_fullres" and lowres_segmentations is None: ++ print("lowres_segmentations is None. Attempting to predict 3d_lowres first...") ++ assert part_id == 0 and num_parts == 1, "if you don't specify a --lowres_segmentations folder for the " \ ++ "inference of the cascade, custom values for part_id and num_parts " \ ++ "are not supported. If you wish to have multiple parts, please " \ ++ "run the 3d_lowres inference first (separately)" ++ model_folder_name = join(network_training_output_dir, "3d_lowres", task_name, trainer_class_name + "__" + ++ args.plans_identifier) ++ assert isdir(model_folder_name), "model output folder not found. Expected: %s" % model_folder_name ++ lowres_output_folder = join(output_folder, "3d_lowres_predictions") ++ predict_from_folder(model_folder_name, input_folder, lowres_output_folder, folds, False, ++ num_threads_preprocessing, num_threads_nifti_save, None, part_id, num_parts, not disable_tta, ++ overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, ++ mixed_precision=not args.disable_mixed_precision, ++ step_size=step_size) ++ lowres_segmentations = lowres_output_folder ++ torch.cuda.empty_cache() ++ print("3d_lowres done") ++ ++ if model == "3d_cascade_fullres": ++ trainer = cascade_trainer_class_name ++ else: ++ trainer = trainer_class_name ++ print(network_training_output_dir) ++ print(model) ++ print(task_name) ++ print(trainer) ++ print(args.plans_identifier) ++ model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + ++ args.plans_identifier) ++ print("using model stored in ", model_folder_name) ++ assert isdir(model_folder_name), "model output folder not found. Expected: %s" % model_folder_name ++ ++ predict_from_folder(model_folder_name, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, ++ num_threads_nifti_save, lowres_segmentations, part_id, num_parts, not disable_tta, ++ overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, ++ mixed_precision=not args.disable_mixed_precision, ++ step_size=step_size, checkpoint_name=args.chk, pre_mode=pre_mode, fp=fp) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/inference/read_bin.py b/pytorch/nnunet/inference/read_bin.py +new file mode 100644 +index 0000000..972d940 +--- /dev/null ++++ b/pytorch/nnunet/inference/read_bin.py +@@ -0,0 +1,30 @@ ++import numpy ++import pdb ++import os ++ ++ ++def read_from_bin(file_name, folder_path='/root/heyupeng/result/dumpOutput_device0/'): ++ file = os.path.join(folder_path, file_name) ++ data = numpy.fromfile(file, dtype='float32') ++ data = data.reshape(3, 128, 128, 128) ++ return data ++ ++ ++def main(): ++ file = 'liver_132_0_128_0_128_0_128_1.bin' ++ print('ready to load:', file) ++ data = numpy.fromfile(file, dtype='float32') ++ data = data.reshape(3, 128, 128, 128) ++ pdb.set_trace() ++ print(data.shape) ++ for i in range(5): ++ print(data[0, 0, 0, i*7:(i+1)*7]) ++ print('-----') ++ for i in range(5): ++ print(data[0, 0, 0, i*7+50:(i+1)*7+50]) ++ pdb.set_trace() ++ print('end\n') ++ ++ ++if __name__ == "__main__": ++ main() +\ No newline at end of file +diff --git a/pytorch/nnunet/inference/read_pkl_file.py b/pytorch/nnunet/inference/read_pkl_file.py +new file mode 100644 +index 0000000..5dcc37b +--- /dev/null ++++ b/pytorch/nnunet/inference/read_pkl_file.py +@@ -0,0 +1,22 @@ ++import numpy ++import pdb ++import os ++import pickle ++ ++ ++def read_pkl(file_name, folder_path='/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/'): ++ file = os.path.join(folder_path, file_name) ++ data = open(file, 'rb') ++ data = pickle.load(data) ++ return data ++ ++ ++def main(): ++ file = 'dataset_properties.pkl' ++ print('ready to load:', file) ++ data = read_pkl(file) ++ print('end\n') ++ ++ ++if __name__ == "__main__": ++ main() +\ No newline at end of file +diff --git a/pytorch/nnunet/inference/read_txt.py b/pytorch/nnunet/inference/read_txt.py +new file mode 100644 +index 0000000..37c94aa +--- /dev/null ++++ b/pytorch/nnunet/inference/read_txt.py +@@ -0,0 +1,29 @@ ++import numpy ++import pdb ++import os ++ ++ ++def read_from_bin(file_name, folder_path='/root/heyupeng/result/dumpOutput_device0/'): ++ file = os.path.join(folder_path, file_name) ++ data = numpy.loadtxt(file) ++ data = data.reshape(3, 128, 128, 128) ++ return data ++ ++ ++def main(): ++ file = 'liver_132_0_128_0_128_0_128_1.txt' ++ print('ready to load:', file) ++ data = numpy.loadtxt(file) ++ data = data.reshape(3, 128, 128, 128) ++ pdb.set_trace() ++ print(data.shape) ++ for i in range(5): ++ print(data[0, 0, 0, i*7:(i+1)*7]) ++ print('-----') ++ for i in range(5): ++ print(data[0, 0, 0, i*7+50:(i+1)*7+50]) ++ pdb.set_trace() ++ print('end\n') ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py b/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py +index 5c2f816..5b831ea 100644 +--- a/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py ++++ b/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py +@@ -21,7 +21,8 @@ import numpy as np + from nnunet.network_architecture.initialization import InitWeights_He + from nnunet.network_architecture.neural_network import SegmentationNetwork + import torch.nn.functional +- ++import pdb ++# pdb.set_trace() + + class ConvDropoutNormNonlin(nn.Module): + """ +@@ -393,7 +394,7 @@ class Generic_UNetPlusPlus(SegmentationNetwork): + + def forward(self, x): + # skips = [] +- seg_outputs = [] ++ seg_outputs = [] # x是五维的 + x0_0 = self.conv_blocks_context[0](x) + x1_0 = self.conv_blocks_context[1](x0_0) + x0_1 = self.loc4[0](torch.cat([x0_0, self.up4[0](x1_0)], 1)) +@@ -425,7 +426,7 @@ class Generic_UNetPlusPlus(SegmentationNetwork): + x0_5 = self.loc0[4](torch.cat([x0_0, x0_1, x0_2, x0_3, x0_4, self.up0[4](x1_4)], 1)) + seg_outputs.append(self.final_nonlin(self.seg_outputs[-5](x0_5))) + +- if self._deep_supervision and self.do_ds: ++ if self._deep_supervision and self.do_ds: # False + return tuple([seg_outputs[-1]] + [i(j) for i, j in + zip(list(self.upscale_logits_ops)[::-1], seg_outputs[:-1][::-1])]) + else: +diff --git a/pytorch/nnunet/network_architecture/neural_network.py b/pytorch/nnunet/network_architecture/neural_network.py +index baa8a05..9425fe9 100644 +--- a/pytorch/nnunet/network_architecture/neural_network.py ++++ b/pytorch/nnunet/network_architecture/neural_network.py +@@ -21,8 +21,14 @@ from torch import nn + import torch + from scipy.ndimage.filters import gaussian_filter + from typing import Union, Tuple, List ++import os + + from torch.cuda.amp import autocast ++import pdb ++from glob import glob ++import time ++from nnunet.inference.read_bin import read_from_bin ++from nnunet.inference.infer_path import INFERENCE_SHAPE_PATH, INFERENCE_BIN_INPUT_FOLDER, INFERENCE_BIN_OUTPUT_FOLDER + + + class NeuralNetwork(nn.Module): +@@ -75,7 +81,8 @@ class SegmentationNetwork(NeuralNetwork): + step_size: float = 0.5, patch_size: Tuple[int, ...] = None, regions_class_order: Tuple[int, ...] = None, + use_gaussian: bool = False, pad_border_mode: str = "constant", + pad_kwargs: dict = None, all_in_gpu: bool = False, +- verbose: bool = True, mixed_precision: bool = True) -> Tuple[np.ndarray, np.ndarray]: ++ verbose: bool = True, mixed_precision: bool = True, img_name=None, ++ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: + """ + Use this function to predict a 3D image. It does not matter whether the network is a 2D or 3D U-Net, it will + detect that automatically and run the appropriate code. +@@ -133,7 +140,7 @@ class SegmentationNetwork(NeuralNetwork): + + assert len(x.shape) == 4, "data must have shape (c,x,y,z)" + +- if mixed_precision: ++ if mixed_precision: # True + context = autocast + else: + context = no_op +@@ -141,11 +148,11 @@ class SegmentationNetwork(NeuralNetwork): + with context(): + with torch.no_grad(): + if self.conv_op == nn.Conv3d: +- if use_sliding_window: ++ if use_sliding_window: # 走到这里 + res = self._internal_predict_3D_3Dconv_tiled(x, step_size, do_mirroring, mirror_axes, patch_size, + regions_class_order, use_gaussian, pad_border_mode, + pad_kwargs=pad_kwargs, all_in_gpu=all_in_gpu, +- verbose=verbose) ++ verbose=verbose, img_name=img_name, pre_mode=pre_mode, fp=fp) + else: + res = self._internal_predict_3D_3Dconv(x, patch_size, do_mirroring, mirror_axes, regions_class_order, + pad_border_mode, pad_kwargs=pad_kwargs, verbose=verbose) +@@ -284,19 +291,161 @@ class SegmentationNetwork(NeuralNetwork): + + return steps + ++ # def _internal_predict_3D_3Dconv_tiled(self, x: np.ndarray, step_size: float, do_mirroring: bool, mirror_axes: tuple, ++ # patch_size: tuple, regions_class_order: tuple, use_gaussian: bool, ++ # pad_border_mode: str, pad_kwargs: dict, all_in_gpu: bool, ++ # verbose: bool, img_name=None) -> Tuple[np.ndarray, np.ndarray]: ++ # # better safe than sorry ++ # assert len(x.shape) == 4, "x must be (c, x, y, z)" ++ # assert self.get_device() != "cpu" ++ # if verbose: print("step_size:", step_size) # 0.5 ++ # if verbose: print("do mirror:", do_mirroring) # True ++ # ++ # torch.cuda.empty_cache() ++ # ++ # assert patch_size is not None, "patch_size cannot be None for tiled prediction" # 128, 128, 128 ++ # ++ # # for sliding window inference the image must at least be as large as the patch size. It does not matter ++ # # whether the shape is divisible by 2**num_pool as long as the patch size is ++ # data, slicer = pad_nd_image(x, patch_size, pad_border_mode, pad_kwargs, True, None) ++ # data_shape = data.shape # still c, x, y, z ++ # ++ # # compute the steps for sliding window ++ # steps = self._compute_steps_for_sliding_window(patch_size, data_shape[1:], step_size) # 计算窗口 ++ # num_tiles = len(steps[0]) * len(steps[1]) * len(steps[2]) ++ # ++ # if verbose: ++ # print("data shape:", data_shape) ++ # print("patch size:", patch_size) ++ # print("steps (x, y, and z):", steps) ++ # print("number of tiles:", num_tiles) ++ # ++ # # we only need to compute that once. It can take a while to compute this due to the large sigma in ++ # # gaussian_filter ++ # if use_gaussian and num_tiles > 1: ++ # if self._gaussian_3d is None or not all( # 走到这里 ++ # [i == j for i, j in zip(patch_size, self._patch_size_for_gaussian_3d)]): ++ # if verbose: print('computing Gaussian') ++ # gaussian_importance_map = self._get_gaussian(patch_size, sigma_scale=1. / 8) ++ # ++ # self._gaussian_3d = gaussian_importance_map ++ # self._patch_size_for_gaussian_3d = patch_size ++ # else: ++ # if verbose: print("using precomputed Gaussian") ++ # gaussian_importance_map = self._gaussian_3d ++ # ++ # gaussian_importance_map = torch.from_numpy(gaussian_importance_map).cuda(self.get_device(), ++ # non_blocking=True) ++ # else: ++ # gaussian_importance_map = None ++ # if all_in_gpu: # False ++ # # If we run the inference in GPU only (meaning all tensors are allocated on the GPU, this reduces ++ # # CPU-GPU communication but required more GPU memory) we need to preallocate a few things on GPU ++ # if use_gaussian and num_tiles > 1: ++ # # half precision for the outputs should be good enough. If the outputs here are half, the ++ # # gaussian_importance_map should be as well ++ # gaussian_importance_map = gaussian_importance_map.half() ++ # ++ # # make sure we did not round anything to 0 ++ # gaussian_importance_map[gaussian_importance_map == 0] = gaussian_importance_map[ ++ # gaussian_importance_map != 0].min() ++ # ++ # add_for_nb_of_preds = gaussian_importance_map ++ # else: ++ # add_for_nb_of_preds = torch.ones(data.shape[1:], device=self.get_device()) ++ # ++ # if verbose: print("initializing result array (on GPU)") ++ # aggregated_results = torch.zeros([self.num_classes] + list(data.shape[1:]), dtype=torch.half, ++ # device=self.get_device()) ++ # ++ # if verbose: print("moving data to GPU") ++ # data = torch.from_numpy(data).cuda(self.get_device(), non_blocking=True) ++ # ++ # if verbose: print("initializing result_numsamples (on GPU)") ++ # aggregated_nb_of_predictions = torch.zeros([self.num_classes] + list(data.shape[1:]), dtype=torch.half, ++ # device=self.get_device()) ++ # else: ++ # if use_gaussian and num_tiles > 1: # 走到这里 ++ # add_for_nb_of_preds = self._gaussian_3d # 128 128 128 ++ # else: ++ # add_for_nb_of_preds = np.ones(data.shape[1:], dtype=np.float32) ++ # aggregated_results = np.zeros([self.num_classes] + list(data.shape[1:]), dtype=np.float32) ++ # aggregated_nb_of_predictions = np.zeros([self.num_classes] + list(data.shape[1:]), dtype=np.float32) ++ # for x in steps[0]: ++ # lb_x = x ++ # ub_x = x + patch_size[0] ++ # for y in steps[1]: ++ # lb_y = y ++ # ub_y = y + patch_size[1] ++ # for z in steps[2]: ++ # lb_z = z ++ # ub_z = z + patch_size[2] ++ # predicted_patch = self._internal_maybe_mirror_and_pred_3D( # data是ndarray ++ # data[None, :, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z], mirror_axes, do_mirroring, ++ # gaussian_importance_map)[0] # -> tensor 3 128 128 128, dtype=float32 ++ # if all_in_gpu: # False ++ # predicted_patch = predicted_patch.half() ++ # else: ++ # predicted_patch = predicted_patch.cpu().numpy() ++ # ++ # aggregated_results[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += predicted_patch # 3 437 309 570 ++ # aggregated_nb_of_predictions[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += add_for_nb_of_preds # 3 437 309 570 ++ # ++ # # we reverse the padding here (remeber that we padded the input to be at least as large as the patch size ++ # slicer = tuple( ++ # [slice(0, aggregated_results.shape[i]) for i in ++ # range(len(aggregated_results.shape) - (len(slicer) - 1))] + slicer[1:]) ++ # aggregated_results = aggregated_results[slicer] # 尺寸不变 ++ # aggregated_nb_of_predictions = aggregated_nb_of_predictions[slicer] ++ # ++ # # computing the class_probabilities by dividing the aggregated result with result_numsamples ++ # class_probabilities = aggregated_results / aggregated_nb_of_predictions # 尺寸相同 ++ # ++ # if regions_class_order is None: # None ++ # predicted_segmentation = class_probabilities.argmax(0) ++ # else: ++ # if all_in_gpu: ++ # class_probabilities_here = class_probabilities.detach().cpu().numpy() ++ # else: ++ # class_probabilities_here = class_probabilities ++ # predicted_segmentation = np.zeros(class_probabilities_here.shape[1:], dtype=np.float32) ++ # for i, c in enumerate(regions_class_order): ++ # predicted_segmentation[class_probabilities_here[i] > 0.5] = c ++ # ++ # if all_in_gpu: # False ++ # if verbose: print("copying results to CPU") ++ # ++ # if regions_class_order is None: ++ # predicted_segmentation = predicted_segmentation.detach().cpu().numpy() ++ # ++ # class_probabilities = class_probabilities.detach().cpu().numpy() ++ # ++ # if verbose: print("prediction done") # True ++ # return predicted_segmentation, class_probabilities ++ ++ def print_mytensor(data): ++ shape = data.shape[0] ++ for s in range(shape): ++ for i in range(3): ++ print(data[s, 0, 0, i * 3:(i + 1) * 3]) ++ for i in range(3): ++ print(data[s, 0, 0, i * 3 + 50:(i + 1) * 3 + 50]) ++ print('-----') ++ ++ # 为了将图像切割后的子图像保存为bin文件而单独修改的函数。使用后请将该函数注释,原函数恢复。 + def _internal_predict_3D_3Dconv_tiled(self, x: np.ndarray, step_size: float, do_mirroring: bool, mirror_axes: tuple, + patch_size: tuple, regions_class_order: tuple, use_gaussian: bool, + pad_border_mode: str, pad_kwargs: dict, all_in_gpu: bool, +- verbose: bool) -> Tuple[np.ndarray, np.ndarray]: ++ verbose: bool, img_name=None, pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: + # better safe than sorry + assert len(x.shape) == 4, "x must be (c, x, y, z)" + assert self.get_device() != "cpu" +- if verbose: print("step_size:", step_size) +- if verbose: print("do mirror:", do_mirroring) ++ if verbose: print("step_size:", step_size) # 0.5 ++ if verbose: print("do mirror:", do_mirroring) # True + + torch.cuda.empty_cache() + +- assert patch_size is not None, "patch_size cannot be None for tiled prediction" ++ assert patch_size is not None, "patch_size cannot be None for tiled prediction" # 128, 128, 128 + + # for sliding window inference the image must at least be as large as the patch size. It does not matter + # whether the shape is divisible by 2**num_pool as long as the patch size is +@@ -304,7 +453,7 @@ class SegmentationNetwork(NeuralNetwork): + data_shape = data.shape # still c, x, y, z + + # compute the steps for sliding window +- steps = self._compute_steps_for_sliding_window(patch_size, data_shape[1:], step_size) ++ steps = self._compute_steps_for_sliding_window(patch_size, data_shape[1:], step_size) # 计算窗口 + num_tiles = len(steps[0]) * len(steps[1]) * len(steps[2]) + + if verbose: +@@ -316,7 +465,7 @@ class SegmentationNetwork(NeuralNetwork): + # we only need to compute that once. It can take a while to compute this due to the large sigma in + # gaussian_filter + if use_gaussian and num_tiles > 1: +- if self._gaussian_3d is None or not all( ++ if self._gaussian_3d is None or not all( # 走到这里 + [i == j for i, j in zip(patch_size, self._patch_size_for_gaussian_3d)]): + if verbose: print('computing Gaussian') + gaussian_importance_map = self._get_gaussian(patch_size, sigma_scale=1. / 8) +@@ -327,16 +476,16 @@ class SegmentationNetwork(NeuralNetwork): + if verbose: print("using precomputed Gaussian") + gaussian_importance_map = self._gaussian_3d + +- gaussian_importance_map = torch.from_numpy(gaussian_importance_map).cuda(self.get_device(), +- non_blocking=True) +- ++ # gaussian_importance_map = torch.from_numpy(gaussian_importance_map).cuda(self.get_device(), ++ # non_blocking=True) ++ gaussian_importance_map = torch.from_numpy(gaussian_importance_map) + else: + gaussian_importance_map = None +- +- if all_in_gpu: ++ aggregated_results = torch.zeros(1) ++ aggregated_nb_of_predictions = torch.zeros(1) ++ if all_in_gpu: # False + # If we run the inference in GPU only (meaning all tensors are allocated on the GPU, this reduces + # CPU-GPU communication but required more GPU memory) we need to preallocate a few things on GPU +- + if use_gaussian and num_tiles > 1: + # half precision for the outputs should be good enough. If the outputs here are half, the + # gaussian_importance_map should be as well +@@ -361,13 +510,80 @@ class SegmentationNetwork(NeuralNetwork): + aggregated_nb_of_predictions = torch.zeros([self.num_classes] + list(data.shape[1:]), dtype=torch.half, + device=self.get_device()) + else: +- if use_gaussian and num_tiles > 1: +- add_for_nb_of_preds = self._gaussian_3d ++ if use_gaussian and num_tiles > 1: # 走到这里 ++ add_for_nb_of_preds = self._gaussian_3d # 128 128 128 + else: + add_for_nb_of_preds = np.ones(data.shape[1:], dtype=np.float32) + aggregated_results = np.zeros([self.num_classes] + list(data.shape[1:]), dtype=np.float32) + aggregated_nb_of_predictions = np.zeros([self.num_classes] + list(data.shape[1:]), dtype=np.float32) +- ++ # 路径设置 ++ shape_path = INFERENCE_SHAPE_PATH # all_shape.txt的目录 ++ if fp is None or fp == 'None': ++ bin_save_folder = INFERENCE_BIN_INPUT_FOLDER # 存放子图bin的目录 ++ bin_real_folder = INFERENCE_BIN_OUTPUT_FOLDER # 存放310推理后的全部结果的目录 ++ else: ++ bin_save_folder = fp # 存放子图bin的目录 ++ bin_real_folder = fp # 存放310推理后的全部结果的目录 ++ def save_as_shape(filename, shape, steps, folder=None): ++ shape_txt = 'all_shape.txt' ++ file = os.path.join(folder, shape_txt) ++ with open(file, "w") as f: ++ folders = filename.split('/') ++ name = folders[-1].split('.')[0] ++ s = name + ' ' + str(int(shape[1])) + ' ' + str(int(shape[2])) + ' ' + str(int(shape[3])) ++ s1 = ','.join(str(s) for s in steps[0]) ++ s2 = ','.join(str(s) for s in steps[1]) ++ s3 = ','.join(str(s) for s in steps[2]) ++ s = s + '-' + s1 + '-' + s2 + '-' + s3 ++ s = s + '\n' ++ f.write(s) # 保存格式:图像名 长 宽 高 ++ def save_as_bin(data, steps, filename, folder=None): # 将图像存储为bin文件 ++ """ ++ data:子图, ndarray 1 128 128 128 ++ steps:子图在原图上的区域 ++ filename:该原图的文件名,例如liver_132.nii.gz ++ folder:最终的bin文件存储的位置 ++ """ ++ x = maybe_to_torch(data) # 3 128 128 128 ++ folders = filename.split('/') ++ name = folders[-1] ++ name = name.split('.')[0] # 获得文件名 ++ for s in steps: ++ name = name + '_' + str(s) # 在文件名上增加位置信息 ++ for cur_i in range(8): ++ if cur_i == 0: ++ y = x # 1 3 128 128 128 ++ if cur_i == 1: ++ y = torch.flip(x, (3,)) ++ if cur_i == 2: ++ y = torch.flip(x, (2,)) ++ if cur_i == 3: ++ y = torch.flip(x, (3, 2)) ++ if cur_i == 4: ++ y = torch.flip(x, (1,)) ++ if cur_i == 5: ++ y = torch.flip(x, (3, 1)) ++ if cur_i == 6: ++ y = torch.flip(x, (2, 1)) ++ if cur_i == 7: ++ y = torch.flip(x, (3, 2, 1)) ++ img = np.array(y).astype(np.float32) ++ file_path = os.path.join(folder, name + '_' + str(cur_i) + ".bin") ++ img.tofile(file_path) ++ # 模式选择 ++ if int(pre_mode) == 1: # 拆分图像 ++ save_as_shape_flag = True ++ save_as_bin_flag = True ++ consolidated_bins = False ++ elif int(pre_mode) == 2: # 合并图像 ++ save_as_shape_flag = False ++ save_as_bin_flag = False ++ consolidated_bins = True ++ else: ++ raise Exception('必须提供推理的模式!以免出现错误!') ++ # 记录全局的形状信息,用于后面帮助结果合并 ++ if save_as_shape_flag: ++ save_as_shape(img_name, data_shape, steps, shape_path) + for x in steps[0]: + lb_x = x + ub_x = x + patch_size[0] +@@ -377,30 +593,108 @@ class SegmentationNetwork(NeuralNetwork): + for z in steps[2]: + lb_z = z + ub_z = z + patch_size[2] +- ++ if save_as_bin_flag: ++ # data是四维的,而送入模型前会变成五维的,尝试将保存的bin保存为四维。 ++ cur_data = data[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] # 当前的一个小patch子图 128 128 128 ++ if save_as_bin_flag: ++ save_as_bin(cur_data, [lb_x, ub_x, lb_y, ub_y, lb_z, ub_z], img_name, bin_save_folder) ++ continue + predicted_patch = self._internal_maybe_mirror_and_pred_3D( +- data[None, :, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z], mirror_axes, do_mirroring, +- gaussian_importance_map)[0] +- +- if all_in_gpu: ++ cur_data, mirror_axes, do_mirroring, ++ gaussian_importance_map)[0] # -> 3 128 128 128 ++ if all_in_gpu: # False + predicted_patch = predicted_patch.half() + else: + predicted_patch = predicted_patch.cpu().numpy() +- +- aggregated_results[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += predicted_patch +- aggregated_nb_of_predictions[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += add_for_nb_of_preds +- ++ aggregated_results[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += predicted_patch # 3 437 309 570 ++ aggregated_nb_of_predictions[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += add_for_nb_of_preds # 3 437 309 570 ++ # 此时steps, aggregated_results, aggregated_nb_of_predictions不可见 ++ # data_shape和num_classes或许由文件名获得 ++ if consolidated_bins: ++ shape_path = shape_path + 'all_shape.txt' ++ with open(shape_path) as f: ++ all_shape = f.readline().replace('\n', '').replace('\r', '') ++ all_shape = all_shape.split('-') ++ all_1 = all_shape[0].split(' ') ++ bin_img_name = all_1[0] ++ bin_data_shape = (1, int(all_1[1]), int(all_1[2]), int(all_1[3])) ++ bin_steps = [] ++ for bin_i in range(3): ++ bin_list = [] ++ all_2 = all_shape[bin_i + 1].split(',') ++ for sth in all_2: ++ bin_list.append(int(sth)) ++ bin_steps.append(bin_list) ++ for x in bin_steps[0]: ++ lb_x = x ++ ub_x = x + patch_size[0] ++ for y in bin_steps[1]: ++ lb_y = y ++ ub_y = y + patch_size[1] ++ for z in bin_steps[2]: ++ lb_z = z ++ ub_z = z + patch_size[2] ++ bin_step = [lb_x, ub_x, lb_y, ub_y, lb_z, ub_z] ++ bin_file_name = bin_img_name ++ for bin_s in bin_step: ++ bin_file_name = bin_file_name + '_' + str(bin_s) ++ result_torch = torch.zeros([1, 3, 128, 128, 128], dtype=torch.float) # 1 3 128 128 128 ++ num_results = 8 ++ mult = gaussian_importance_map.cpu() ++ for zz in range(8): ++ bin_file_name_zz = bin_file_name + '_' + str(zz) ++ bin_file_name_zzz = bin_file_name_zz + '_1.bin' ++ bin_predicted_patch = read_from_bin(bin_file_name_zzz, bin_real_folder) # ndarray,3 128 128 128, float32 ++ bin_predicted_patch = bin_predicted_patch[None, :, :, :, :] # 转化为五维的 ++ bin_predicted_patch = maybe_to_torch(bin_predicted_patch) # tensor, 3 128 128 128 float32 ++ if zz == 0: ++ pred = self.inference_apply_nonlin(bin_predicted_patch) # 1 3 128 128 128 ++ result_torch += 1 / num_results * pred # 1 3 128 128 128 ++ if zz == 1 and (2 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (4,)) ++ if zz == 2 and (1 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (3,)) ++ if zz == 3 and (2 in mirror_axes) and (1 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (4, 3)) ++ if zz == 4 and (0 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (2,)) ++ if zz == 5 and (0 in mirror_axes) and (2 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (4, 2)) ++ if zz == 6 and (0 in mirror_axes) and (1 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (3, 2)) ++ if zz == 7 and (0 in mirror_axes) and (1 in mirror_axes) and (2 in mirror_axes): ++ pred = self.inference_apply_nonlin(bin_predicted_patch) ++ result_torch += 1 / num_results * torch.flip(pred, (4, 3, 2)) ++ result_torch[:, :] *= mult # torch 1 3 128 128 128 float32 ++ result_torch = result_torch[0] # 变为3 128 128 128 ++ bin_predicted_patch = result_torch ++ # 一个子图的8轮已经推理完毕,实际上我只到了Z一轮,下面就break了。如果想要跑完,请删除三重循环末尾的三个break ++ bin_predicted_patch = bin_predicted_patch.cpu().numpy() ++ aggregated_results[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += bin_predicted_patch # 3 437 309 570 ++ aggregated_nb_of_predictions[:, lb_x:ub_x, lb_y:ub_y, lb_z:ub_z] += add_for_nb_of_preds # 3 437 309 570 ++ # break ++ # break ++ # break ++ else: ++ import sys ++ sys.exit(0) + # we reverse the padding here (remeber that we padded the input to be at least as large as the patch size + slicer = tuple( + [slice(0, aggregated_results.shape[i]) for i in + range(len(aggregated_results.shape) - (len(slicer) - 1))] + slicer[1:]) +- aggregated_results = aggregated_results[slicer] ++ aggregated_results = aggregated_results[slicer] # 尺寸不变 + aggregated_nb_of_predictions = aggregated_nb_of_predictions[slicer] + + # computing the class_probabilities by dividing the aggregated result with result_numsamples +- class_probabilities = aggregated_results / aggregated_nb_of_predictions ++ class_probabilities = aggregated_results / aggregated_nb_of_predictions # 尺寸相同 + +- if regions_class_order is None: ++ if regions_class_order is None: # None + predicted_segmentation = class_probabilities.argmax(0) + else: + if all_in_gpu: +@@ -411,7 +705,7 @@ class SegmentationNetwork(NeuralNetwork): + for i, c in enumerate(regions_class_order): + predicted_segmentation[class_probabilities_here[i] > 0.5] = c + +- if all_in_gpu: ++ if all_in_gpu: # False + if verbose: print("copying results to CPU") + + if regions_class_order is None: +@@ -419,7 +713,7 @@ class SegmentationNetwork(NeuralNetwork): + + class_probabilities = class_probabilities.detach().cpu().numpy() + +- if verbose: print("prediction done") ++ if verbose: print("prediction done") # True + return predicted_segmentation, class_probabilities + + def _internal_predict_2D_2Dconv(self, x: np.ndarray, min_size: Tuple[int, int], do_mirroring: bool, +@@ -504,54 +798,69 @@ class SegmentationNetwork(NeuralNetwork): + assert len(x.shape) == 5, 'x must be (b, c, x, y, z)' + # everything in here takes place on the GPU. If x and mult are not yet on GPU this will be taken care of here + # we now return a cuda tensor! Not numpy array! +- +- x = to_cuda(maybe_to_torch(x), gpu_id=self.get_device()) ++ def print_mytensor(data): ++ shape = data.shape[0] ++ for s in range(shape): ++ for i in range(3): ++ print(data[s, 0, 0, i * 3:(i + 1) * 3]) ++ for i in range(3): ++ print(data[s, 0, 0, i * 3 + 50:(i + 1) * 3 + 50]) ++ print('-----') ++ x = to_cuda(maybe_to_torch(x), gpu_id=self.get_device()) # ndarray, 1 1 128 128 128,之后变成tensor + result_torch = torch.zeros([1, self.num_classes] + list(x.shape[2:]), +- dtype=torch.float).cuda(self.get_device(), non_blocking=True) ++ dtype=torch.float).cuda(self.get_device(), non_blocking=True) # 1 3 128 128 128,全是0 + + if mult is not None: +- mult = to_cuda(maybe_to_torch(mult), gpu_id=self.get_device()) ++ mult = to_cuda(maybe_to_torch(mult), gpu_id=self.get_device()) # tensor, 128 128 128 + +- if do_mirroring: ++ if do_mirroring: # True + mirror_idx = 8 +- num_results = 2 ** len(mirror_axes) ++ num_results = 2 ** len(mirror_axes) # 8 + else: + mirror_idx = 1 + num_results = 1 + for m in range(mirror_idx): + if m == 0: +- pred = self.inference_apply_nonlin(self(x)) +- result_torch += 1 / num_results * pred ++ y = self(x) # tensor, 1 3 128 128 128 ++ pred = self.inference_apply_nonlin(y) # 1 3 128 128 128 ++ result_torch += 1 / num_results * pred # 1 3 128 128 128 + + if m == 1 and (2 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, )))) ++ y = self(torch.flip(x, (4, ))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4,)) + + if m == 2 and (1 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (3, )))) ++ y = self(torch.flip(x, (3, ))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (3,)) + + if m == 3 and (2 in mirror_axes) and (1 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 3)))) ++ y = self(torch.flip(x, (4, 3))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4, 3)) + + if m == 4 and (0 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (2, )))) ++ y = self(torch.flip(x, (2, ))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (2,)) + + if m == 5 and (0 in mirror_axes) and (2 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 2)))) ++ y = self(torch.flip(x, (4, 2))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4, 2)) + + if m == 6 and (0 in mirror_axes) and (1 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (3, 2)))) ++ y = self(torch.flip(x, (3, 2))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (3, 2)) + + if m == 7 and (0 in mirror_axes) and (1 in mirror_axes) and (2 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 3, 2)))) ++ y = self(torch.flip(x, (4, 3, 2))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4, 3, 2)) + +- if mult is not None: ++ if mult is not None: # True + result_torch[:, :] *= mult + + return result_torch +diff --git a/pytorch/nnunet/postprocessing/connected_components.py b/pytorch/nnunet/postprocessing/connected_components.py +index c69471e..45ff991 100644 +--- a/pytorch/nnunet/postprocessing/connected_components.py ++++ b/pytorch/nnunet/postprocessing/connected_components.py +@@ -175,7 +175,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + pp_results['num_samples'] = len(validation_result_raw['all']) + validation_result_raw = validation_result_raw['mean'] + +- if advanced_postprocessing: ++ if advanced_postprocessing: # False + # first treat all foreground classes as one and remove all but the largest foreground connected component + results = [] + for f in fnames: +@@ -270,12 +270,12 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + if len(classes) > 1: + # now depending on whether we do remove all but the largest foreground connected component we define the source dir + # for the next one to be the raw or the temp dir +- if do_fg_cc: ++ if do_fg_cc: # True + source = folder_all_classes_as_fg + else: + source = join(base, raw_subfolder_name) + +- if advanced_postprocessing: ++ if advanced_postprocessing: # False + # now run this for each class separately + results = [] + for f in fnames: +@@ -325,7 +325,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + json_output_file=join(folder_per_class, "summary.json"), + json_author="Fabian", num_threads=processes) + +- if do_fg_cc: ++ if do_fg_cc: # True + old_res = deepcopy(validation_result_PP_test) + else: + old_res = validation_result_raw +@@ -350,7 +350,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + else: + print("Only one class present, no need to do each class separately as this is covered in fg vs bg") + +- if not advanced_postprocessing: ++ if not advanced_postprocessing: # True + pp_results['min_valid_object_sizes'] = None + + print("done") +diff --git a/pytorch/nnunet/preprocessing/cropping.py b/pytorch/nnunet/preprocessing/cropping.py +index bb0a92a..95d07bc 100644 +--- a/pytorch/nnunet/preprocessing/cropping.py ++++ b/pytorch/nnunet/preprocessing/cropping.py +@@ -39,6 +39,7 @@ def get_bbox_from_mask(mask, outside_value=0): + maxxidx = int(np.max(mask_voxel_coords[1])) + 1 + minyidx = int(np.min(mask_voxel_coords[2])) + maxyidx = int(np.max(mask_voxel_coords[2])) + 1 ++ print(mask.shape, minzidx, maxzidx, minxidx, maxxidx, minyidx, maxyidx) + return [[minzidx, maxzidx], [minxidx, maxxidx], [minyidx, maxyidx]] + + +@@ -202,6 +203,7 @@ class ImageCropper(object): + list_of_args.append((case, case_identifier, overwrite_existing)) + + p = Pool(self.num_threads) ++ print('Pool', self.num_threads) + p.starmap(self.load_crop_save, list_of_args) + p.close() + p.join() +diff --git a/pytorch/nnunet/run/look_pkl.py b/pytorch/nnunet/run/look_pkl.py +new file mode 100644 +index 0000000..1a9d78a +--- /dev/null ++++ b/pytorch/nnunet/run/look_pkl.py +@@ -0,0 +1,18 @@ ++import numpy as np ++import pickle ++ ++inputfile = u'/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver' \ ++ u'/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model.pkl' ++# test = np.load('labels.npy', encoding = "latin1") ++# doc = open('1.txt', 'a') ++# print(test, file=doc) ++ ++ ++ ++fr = open(inputfile, 'rb') ++inf = pickle.load(fr) ++print('done') ++ ++ ++ ++print('end') +\ No newline at end of file +diff --git a/pytorch/nnunet/run/model_prof.py b/pytorch/nnunet/run/model_prof.py +new file mode 100644 +index 0000000..013df26 +--- /dev/null ++++ b/pytorch/nnunet/run/model_prof.py +@@ -0,0 +1,124 @@ ++# Copyright (c) Soumith Chintala 2016, ++# All rights reserved ++# ++# Copyright 2020 Huawei Technologies Co., Ltd ++# ++# Licensed under the BSD 3-Clause License (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# https://spdx.org/licenses/BSD-3-Clause.html ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++# -*- coding: utf-8 -*- ++"""pytorch_prof.py ++""" ++ ++import torch ++import torch.optim as optim ++import torch.nn as nn ++import time ++import argparse ++ ++ ++def build_model(): ++ # 请自定义模型并加载预训练模型 ++ import torchvision ++ model = torchvision.models.resnet50(pretrained=True) ++ return model ++ ++ ++def get_raw_data(): ++ input_tensor = torch.randn(2, 3, 224, 224) ++ return input_tensor ++ ++ ++def criterion(x): ++ base_func = nn.CrossEntropyLoss() ++ shape_list = x.shape ++ N = shape_list[0] ++ R = 1 ++ if len(shape_list) > 1: ++ for r in shape_list[1:]: ++ R *= r ++ T = torch.randint(0,R, size=(N,)).to(x.device) ++ if str(T.device).startswith('npu'): ++ T = T.int() ++ return base_func(x.reshape(N, -1), T) ++ ++ ++if __name__ == '__main__': ++ parser = argparse.ArgumentParser(description='PyTorch Prof') ++ parser.add_argument('--device', type=str, default='cpu', ++ help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') ++ parser.add_argument('--amp', default=False, action='store_true', ++ help='use amp during prof') ++ parser.add_argument('--loss-scale', default=64.0, type=float, ++ help='loss scale using in amp, default 64.0, -1 means dynamic') ++ parser.add_argument('--opt-level', default='O2', type=str, ++ help='opt-level using in amp, default O2') ++ parser.add_argument('--FusedSGD', default=False, action='store_true', ++ help='use FusedSGD during prof') ++ ++ args = parser.parse_args() ++ ++ # 1.准备工作 ++ if args.device.startswith('cuda'): ++ torch.cuda.set_device(args.device) ++ prof_kwargs = {'use_cuda': True} ++ elif args.device.startswith('npu'): ++ torch.npu.set_device(args.device) ++ prof_kwargs = {'use_npu': True} ++ else: ++ prof_kwargs = {} ++ ++ # 2.构建模型 ++ model = build_model() ++ if args.FusedSGD: ++ from apex.optimizers import NpuFusedSGD ++ optimizer = NpuFusedSGD(model.parameters(), lr=0.01) ++ model = model.to(args.device) ++ if args.amp: ++ from apex import amp ++ model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, ++ loss_scale=None if args.loss_scale == -1 else args.loss_scale, ++ combine_grad=True) ++ else: ++ optimizer = optim.SGD(model.parameters(), lr=0.01) ++ model = model.to(args.device) ++ if args.amp: ++ from apex import amp ++ model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, ++ loss_scale=None if args.loss_scale == -1 else args.loss_scale) ++ ++ # 3.生成input ++ input_tensor = get_raw_data() ++ input_tensor = input_tensor.to(args.device) ++ ++ # 先运行一次,保证prof得到的性能是正确的 ++ def run(): ++ output_tensor = model(input_tensor) ++ optimizer.zero_grad() ++ loss = criterion(output_tensor) ++ if args.amp: ++ with amp.scale_loss(loss, optimizer) as scaled_loss: ++ scaled_loss.backward() ++ else: ++ loss.backward() ++ optimizer.step() ++ return loss ++ for i in range(5): ++ start_time = time.time() ++ loss = run() ++ print('iter: %d, loss: %.2f, time: %.2f' % (i, loss, (time.time() - start_time)*1000)) ++ ++ # 4. 执行forward+profiling ++ with torch.autograd.profiler.profile(**prof_kwargs) as prof: ++ run() ++ print(prof.key_averages().table()) ++ prof.export_chrome_trace("pytorch_prof_%s.prof" % args.device) +\ No newline at end of file +diff --git a/pytorch/nnunet/run/run_training.py b/pytorch/nnunet/run/run_training.py +index eb7ca2f..08214d6 100644 +--- a/pytorch/nnunet/run/run_training.py ++++ b/pytorch/nnunet/run/run_training.py +@@ -31,7 +31,7 @@ def main(): + parser.add_argument("task", help="can be task name or task id") + parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') + parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", +- action="store_true") ++ action="store_true", default=True) + parser.add_argument("-w", required=False, default=None, help="Load pre-trained Models Genesis") + parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", + action="store_true") +@@ -134,8 +134,8 @@ def main(): + fp16=run_mixed_precision) + + trainer.initialize(not validation_only) +- +- if weights != None: ++ ++ if weights != None: + trainer.load_pretrained_encoder_weights(weights) + sys.stdout.flush() + +diff --git a/pytorch/nnunet/run/run_training2.py b/pytorch/nnunet/run/run_training2.py +new file mode 100644 +index 0000000..372a4d4 +--- /dev/null ++++ b/pytorch/nnunet/run/run_training2.py +@@ -0,0 +1,172 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++import os ++import sys ++import argparse ++from batchgenerators.utilities.file_and_folder_operations import * ++from nnunet.run.default_configuration import get_default_configuration ++from nnunet.paths import default_plans_identifier ++from nnunet.training.cascade_stuff.predict_next_stage import predict_next_stage ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.training.network_training.nnUNetTrainerCascadeFullRes import nnUNetTrainerCascadeFullRes ++from nnunet.training.network_training.nnUNetTrainerV2_CascadeFullRes import nnUNetTrainerV2CascadeFullRes ++from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++ ++ ++# import pdb ++# pdb.set_trace() ++ ++def main(): ++ parser = argparse.ArgumentParser() ++ parser.add_argument("-network", default="3d_fullres") ++ parser.add_argument("-network_trainer", default="nnUNetPlusPlusTrainerV2") ++ parser.add_argument("-task", default="003", help="can be task name or task id") ++ parser.add_argument("-fold", default="0", help='0, 1, ..., 5 or \'all\'') ++ parser.add_argument("-val", "--validation_only", default=False, ++ help="use this if you want to only run the validation", ++ action="store_true") ++ parser.add_argument("-w", required=False, default=None, help="Load pre-trained Models Genesis") ++ parser.add_argument("-c", "--continue_training", default=False, help="use this if you want to continue a training", ++ action="store_true") ++ parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", ++ default=default_plans_identifier, required=False) ++ parser.add_argument("--use_compressed_data", default=False, action="store_true", ++ help="If you set use_compressed_data, the training cases will not be decompressed. Reading compressed data " ++ "is much more CPU and RAM intensive and should only be used if you know what you are " ++ "doing", required=False) ++ parser.add_argument("--deterministic", ++ help="Makes training deterministic, but reduces training speed substantially. I (Fabian) think " ++ "this is not necessary. Deterministic training will make you overfit to some random seed. " ++ "Don't use that.", ++ required=False, default=False, action="store_true") ++ parser.add_argument("--npz", required=False, default=False, action="store_true", help="if set then nnUNet will " ++ "export npz files of " ++ "predicted segmentations " ++ "in the validation as well. " ++ "This is needed to run the " ++ "ensembling step so unless " ++ "you are developing nnUNet " ++ "you should enable this") ++ parser.add_argument("--find_lr", required=False, default=False, action="store_true", ++ help="not used here, just for fun") ++ parser.add_argument("--valbest", required=False, default=False, action="store_true", ++ help="hands off. This is not intended to be used") ++ parser.add_argument("--fp32", required=False, default=False, action="store_true", ++ help="disable mixed precision training and run old school fp32") ++ parser.add_argument("--val_folder", required=False, default="validation_raw", ++ help="name of the validation folder. No need to use this for most people") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations. Testing purpose only. Hands off") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z if z is resampled separately. Testing purpose only. " ++ # "Hands off") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") ++ ++ args = parser.parse_args() ++ print('------------\n', args) ++ ++ task = args.task ++ fold = args.fold ++ network = args.network ++ network_trainer = args.network_trainer ++ weights = args.w ++ validation_only = args.validation_only ++ plans_identifier = args.p ++ find_lr = args.find_lr ++ ++ use_compressed_data = args.use_compressed_data ++ decompress_data = not use_compressed_data ++ ++ deterministic = args.deterministic ++ valbest = args.valbest ++ ++ fp32 = args.fp32 ++ run_mixed_precision = not fp32 ++ ++ val_folder = args.val_folder ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ ++ if not task.startswith("Task"): ++ task_id = int(task) ++ task = convert_id_to_task_name(task_id) ++ ++ if fold == 'all': ++ pass ++ else: ++ fold = int(fold) ++ ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ ++ trainer_class, domain = get_default_configuration(network, task, network_trainer, plans_identifier) ++ ++ if trainer_class is None: ++ raise RuntimeError("Could not find trainer class in nnunet.training.network_training") ++ ++ if network == "3d_cascade_fullres": ++ assert issubclass(trainer_class, (nnUNetTrainerCascadeFullRes, nnUNetTrainerV2CascadeFullRes)), \ ++ "If running 3d_cascade_fullres then your " \ ++ "trainer class must be derived from " \ ++ "nnUNetTrainerCascadeFullRes" ++ else: ++ assert issubclass(trainer_class, ++ nnUNetTrainer), "network_trainer was found but is not derived from nnUNetTrainer" ++ ++ trainer = trainer_class(plans_file, fold, output_folder=output_folder_name, dataset_directory=dataset_directory, ++ batch_dice=batch_dice, stage=stage, unpack_data=decompress_data, ++ deterministic=deterministic, ++ fp16=run_mixed_precision) ++ ++ trainer.initialize(not validation_only) ++ ++ if weights != None: ++ trainer.load_pretrained_encoder_weights(weights) ++ sys.stdout.flush() ++ ++ if find_lr: ++ trainer.find_lr() ++ else: ++ if not validation_only: ++ if args.continue_training: ++ trainer.load_latest_checkpoint() ++ trainer.run_training() ++ else: ++ if valbest: ++ trainer.load_best_checkpoint(train=False) ++ else: ++ trainer.load_latest_checkpoint(train=False) ++ ++ trainer.network.eval() ++ ++ # predict validation ++ trainer.validate(save_softmax=args.npz, validation_folder_name=val_folder) ++ ++ if network == '3d_lowres': ++ trainer.load_best_checkpoint(False) ++ print("predicting segmentations for the next stage of the cascade") ++ predict_next_stage(trainer, join(dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/run/run_training_DDP.py b/pytorch/nnunet/run/run_training_DDP.py +index 5ffcdcf..6ad3d5a 100644 +--- a/pytorch/nnunet/run/run_training_DDP.py ++++ b/pytorch/nnunet/run/run_training_DDP.py +@@ -27,13 +27,13 @@ from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name + + def main(): + parser = argparse.ArgumentParser() +- parser.add_argument("network") +- parser.add_argument("network_trainer") +- parser.add_argument("task", help="can be task name or task id") +- parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') ++ parser.add_argument("network", default='3d_fullres') ++ parser.add_argument("network_trainer", default='nnUNetTrainerV2_DDP') ++ parser.add_argument("task", help="can be task name or task id", default='003') ++ parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'', default='0') + parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", +- action="store_true") +- parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", ++ action="store_true", default=False) ++ parser.add_argument("-c", "--continue_training", default=False, help="use this if you want to continue a training", + action="store_true") + parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", + default=default_plans_identifier, required=False) +@@ -78,7 +78,7 @@ def main(): + # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") + + args = parser.parse_args() +- ++ print('\n\n args=', args, '\n\n') + task = args.task + fold = args.fold + network = args.network +@@ -115,7 +115,7 @@ def main(): + # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) + + plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ +- trainer_class = get_default_configuration(network, task, network_trainer, plans_identifier) ++ trainer_class, _ = get_default_configuration(network, task, network_trainer, plans_identifier) + + if trainer_class is None: + raise RuntimeError("Could not find trainer class in meddec.model_training") +diff --git a/pytorch/nnunet/run/run_training_hypDDP.py b/pytorch/nnunet/run/run_training_hypDDP.py +new file mode 100644 +index 0000000..f50744c +--- /dev/null ++++ b/pytorch/nnunet/run/run_training_hypDDP.py +@@ -0,0 +1,164 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++import argparse ++ ++from batchgenerators.utilities.file_and_folder_operations import * ++from nnunet.run.default_configuration import get_default_configuration ++from nnunet.paths import default_plans_identifier ++from nnunet.training.cascade_stuff.predict_next_stage import predict_next_stage ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.training.network_training.nnUNetTrainerCascadeFullRes import nnUNetTrainerCascadeFullRes ++from nnunet.training.network_training.nnUNetTrainerV2_CascadeFullRes import nnUNetTrainerV2CascadeFullRes ++from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++ ++ ++def main(): ++ parser = argparse.ArgumentParser() ++ parser.add_argument("network") ++ parser.add_argument("network_trainer") ++ parser.add_argument("task", help="can be task name or task id") ++ parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') ++ parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", ++ action="store_true") ++ parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", ++ action="store_true") ++ parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", ++ default=default_plans_identifier, required=False) ++ parser.add_argument("--use_compressed_data", default=False, action="store_true", ++ help="If you set use_compressed_data, the training cases will not be decompressed. Reading compressed data " ++ "is much more CPU and RAM intensive and should only be used if you know what you are " ++ "doing", required=False) ++ parser.add_argument("--deterministic", ++ help="Makes training deterministic, but reduces training speed substantially. I (Fabian) think " ++ "this is not necessary. Deterministic training will make you overfit to some random seed. " ++ "Don't use that.", ++ required=False, default=False, action="store_true") ++ parser.add_argument("--local_rank", default=0, type=int) ++ parser.add_argument("--fp32", required=False, default=False, action="store_true", ++ help="disable mixed precision training and run old school fp32") ++ parser.add_argument("--dbs", required=False, default=False, action="store_true", help="distribute batch size. If " ++ "True then whatever " ++ "batch_size is in plans will " ++ "be distributed over DDP " ++ "models, if False then each " ++ "model will have batch_size " ++ "for a total of " ++ "GPUs*batch_size") ++ parser.add_argument("--npz", required=False, default=False, action="store_true", help="if set then nnUNet will " ++ "export npz files of " ++ "predicted segmentations " ++ "in the vlaidation as well. " ++ "This is needed to run the " ++ "ensembling step so unless " ++ "you are developing nnUNet " ++ "you should enable this") ++ parser.add_argument("--valbest", required=False, default=False, action="store_true", help="") ++ parser.add_argument("--find_lr", required=False, default=False, action="store_true", help="") ++ parser.add_argument("--val_folder", required=False, default="validation_raw", ++ help="name of the validation folder. No need to use this for most people") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations. Testing purpose only. Hands off") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z if z is resampled separately. Testing purpose only. " ++ # "Hands off") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") ++ ++ args = parser.parse_args() ++ print('\n\n args=', args, '\n\n') ++ task = args.task ++ fold = args.fold ++ network = args.network ++ network_trainer = args.network_trainer ++ validation_only = args.validation_only ++ plans_identifier = args.p ++ use_compressed_data = args.use_compressed_data ++ decompress_data = not use_compressed_data ++ deterministic = args.deterministic ++ valbest = args.valbest ++ find_lr = args.find_lr ++ val_folder = args.val_folder ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ fp32 = args.fp32 ++ ++ if not task.startswith("Task"): ++ task_id = int(task) ++ task = convert_id_to_task_name(task_id) ++ ++ if fold == 'all': ++ pass ++ else: ++ fold = int(fold) ++ # ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ ++ trainer_class, _ = get_default_configuration(network, task, network_trainer, plans_identifier) ++ ++ if trainer_class is None: ++ raise RuntimeError("Could not find trainer class in meddec.model_training") ++ ++ if network == "3d_cascade_fullres": ++ assert issubclass(trainer_class, (nnUNetTrainerCascadeFullRes, nnUNetTrainerV2CascadeFullRes)), \ ++ "If running 3d_cascade_fullres then your " \ ++ "trainer class must be derived from " \ ++ "nnUNetTrainerCascadeFullRes" ++ else: ++ assert issubclass(trainer_class, ++ nnUNetTrainer), "network_trainer was found but is not derived from nnUNetTrainer" ++ ++ trainer = trainer_class(plans_file, fold, local_rank=args.local_rank, output_folder=output_folder_name, ++ dataset_directory=dataset_directory, batch_dice=batch_dice, stage=stage, ++ unpack_data=decompress_data, deterministic=deterministic, fp16=not fp32, ++ distribute_batch_size=args.dbs) ++ ++ trainer.initialize(not validation_only) ++ ++ if find_lr: ++ trainer.find_lr() ++ else: ++ if not validation_only: ++ if args.continue_training: ++ trainer.load_latest_checkpoint() ++ trainer.run_training() ++ else: ++ if valbest: ++ trainer.load_best_checkpoint(train=False) ++ else: ++ trainer.load_latest_checkpoint(train=False) ++ ++ trainer.network.eval() ++ ++ # predict validation ++ trainer.validate(save_softmax=args.npz, validation_folder_name=val_folder) ++ ++ if network == '3d_lowres': ++ trainer.load_best_checkpoint(False) ++ print("predicting segmentations for the next stage of the cascade") ++ predict_next_stage(trainer, join(dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/training/loss_functions/crossentropy.py b/pytorch/nnunet/training/loss_functions/crossentropy.py +index 6195437..0c782d9 100644 +--- a/pytorch/nnunet/training/loss_functions/crossentropy.py ++++ b/pytorch/nnunet/training/loss_functions/crossentropy.py +@@ -6,6 +6,15 @@ class RobustCrossEntropyLoss(nn.CrossEntropyLoss): + this is just a compatibility layer because my target tensor is float and has an extra dimension + """ + def forward(self, input: Tensor, target: Tensor) -> Tensor: ++ # i = 0 ++ # print('----------') ++ # print('input:', input.shape) ++ # for i in range(len(input)): ++ # print(i, input[i].shape) ++ # print('target') ++ # for i in range(len(target)): ++ # print(i, target[i].shape) ++ # print('\n----------') + if len(target.shape) == len(input.shape): + assert target.shape[1] == 1 + target = target[:, 0] +diff --git a/pytorch/nnunet/training/network_training/network_trainer.py b/pytorch/nnunet/training/network_training/network_trainer.py +index e920158..f0031d3 100644 +--- a/pytorch/nnunet/training/network_training/network_trainer.py ++++ b/pytorch/nnunet/training/network_training/network_trainer.py +@@ -37,6 +37,7 @@ from abc import abstractmethod + from datetime import datetime + from tqdm import trange + from nnunet.utilities.to_torch import maybe_to_torch, to_cuda ++import pdb + + + class NetworkTrainer(object): +@@ -438,7 +439,8 @@ class NetworkTrainer(object): + self._maybe_init_amp() + + def _maybe_init_amp(self): +- if self.fp16 and self.amp_grad_scaler is None and torch.cuda.is_available(): ++ # if self.fp16 and self.amp_grad_scaler is None and torch.cuda.is_available(): ++ if self.fp16 and self.amp_grad_scaler is None: + self.amp_grad_scaler = GradScaler() + + def plot_network_architecture(self): +diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py +index e9aa611..9b97e8c 100644 +--- a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py ++++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py +@@ -13,6 +13,7 @@ + # limitations under the License. + + ++import SimpleITK as sitk + from collections import OrderedDict + from typing import Tuple + import sys +@@ -35,12 +36,10 @@ from torch.cuda.amp import autocast + from nnunet.training.learning_rate.poly_lr import poly_lr + from batchgenerators.utilities.file_and_folder_operations import * + +- + class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + """ + Info for Fabian: same as internal nnUNetTrainerV2_2 + """ +- + def __init__(self, plans_file, fold, output_folder=None, dataset_directory=None, batch_dice=True, stage=None, + unpack_data=True, deterministic=True, fp16=False): + super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, +@@ -66,7 +65,7 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + maybe_mkdir_p(self.output_folder) + + if force_load_plans or (self.plans is None): +- self.load_plans_file() ++ self.load_plans_file() # '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetPlansv2.1_plans_3D.pkl' + + self.process_plans(self.plans) + +@@ -189,6 +188,7 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + """ + ds = self.network.do_ds + self.network.do_ds = False ++ overwrite = False # 不希望重新跑推理,不然太久了 + ret = super().validate(do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian, + overwrite, validation_folder_name, debug, all_in_gpu, segmentation_export_kwargs) + +@@ -200,16 +200,18 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + use_sliding_window: bool = True, step_size: float = 0.5, + use_gaussian: bool = True, pad_border_mode: str = 'constant', + pad_kwargs: dict = None, all_in_gpu: bool = True, +- verbose: bool = True, mixed_precision=True) -> Tuple[np.ndarray, np.ndarray]: ++ verbose: bool = True, mixed_precision=True, img_name=None, ++ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: + """ + We need to wrap this because we need to enforce self.network.do_ds = False for prediction + """ +- ds = self.network.do_ds ++ ds = self.network.do_ds # ds = True + self.network.do_ds = False + ret = super().predict_preprocessed_data_return_seg_and_softmax(data, do_mirroring, mirror_axes, + use_sliding_window, step_size, use_gaussian, + pad_border_mode, pad_kwargs, all_in_gpu, verbose, +- mixed_precision=mixed_precision) ++ mixed_precision=mixed_precision, img_name=img_name, ++ pre_mode=pre_mode, fp=fp) + self.network.do_ds = ds + return ret + +@@ -225,7 +227,20 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + data_dict = next(data_generator) + data = data_dict['data'] + target = data_dict['target'] +- ++ # i = 0 ++ # while True: ++ # i += 1 ++ # data_dict = next(data_generator) ++ # data = data_dict['data'] ++ # target = data_dict['target'] ++ # data_numpy_output = '/home/yupeng/save_data.nii.gz' ++ # data_numpy = data[0][0].numpy() ++ # target_numpy = target[0][0][0].numpy() ++ # data_1 = data_numpy.flatten() ++ # minm = np.argmin(data_1) ++ # maxm = np.argmax(data_1) ++ # out = sitk.GetImageFromArray(data_numpy) ++ # sitk.WriteImage(out, data_numpy_output) + data = maybe_to_torch(data) + target = maybe_to_torch(target) + +@@ -234,7 +249,6 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + target = to_cuda(target) + + self.optimizer.zero_grad() +- + if self.fp16: + with autocast(): + output = self.network(data) +@@ -261,7 +275,6 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + self.run_online_evaluation(output, target) + + del target +- + return l.detach().cpu().numpy() + + def do_split(self): +diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py +new file mode 100644 +index 0000000..e2ab2fa +--- /dev/null ++++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py +@@ -0,0 +1,483 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++from collections import OrderedDict ++from typing import Tuple ++import sys ++import time ++import numpy as np ++import torch ++import torch.distributed as dist ++from torch.cuda.amp import autocast ++from torch.nn.parallel import DistributedDataParallel as DDP ++from nnunet.training.loss_functions.deep_supervision import MultipleOutputLoss2 ++from nnunet.utilities.to_torch import maybe_to_torch, to_cuda ++from nnunet.training.data_augmentation.default_data_augmentation import get_moreDA_augmentation ++from nnunet.network_architecture.generic_UNetPlusPlus import Generic_UNetPlusPlus ++from nnunet.network_architecture.initialization import InitWeights_He ++from nnunet.network_architecture.neural_network import SegmentationNetwork ++from nnunet.training.data_augmentation.default_data_augmentation import default_2D_augmentation_params, \ ++ get_patch_size, default_3D_augmentation_params ++from nnunet.training.dataloading.dataset_loading import unpack_dataset ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.utilities.nd_softmax import softmax_helper ++from sklearn.model_selection import KFold ++from torch import nn ++from torch.cuda.amp import autocast ++from nnunet.training.learning_rate.poly_lr import poly_lr ++from batchgenerators.utilities.file_and_folder_operations import * ++ ++ ++class nnUNetPlusPlusTrainerV2_DDP(nnUNetTrainer): ++ """ ++ Info for Fabian: same as internal nnUNetTrainerV2_2 ++ """ ++ ++ def __init__(self, plans_file, fold, local_rank, output_folder=None, dataset_directory=None, batch_dice=True, ++ stage=None, ++ unpack_data=True, deterministic=True, fp16=False, distribute_batch_size=1): ++ super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, ++ deterministic, fp16) ++ self.init_args = ( ++ plans_file, fold, local_rank, output_folder, dataset_directory, batch_dice, stage, unpack_data, ++ deterministic, distribute_batch_size, fp16) ++ self.max_num_epochs = 1000 ++ self.initial_lr = 1e-2 ++ self.deep_supervision_scales = None ++ self.ds_loss_weights = None ++ self.distribute_batch_size = distribute_batch_size ++ np.random.seed(local_rank) ++ torch.manual_seed(local_rank) ++ self.local_rank = local_rank ++ if torch.cuda.is_available(): ++ torch.cuda.set_device(local_rank) ++ dist.init_process_group(backend='nccl', init_method='env://') ++ ++ self.pin_memory = True ++ ++ def initialize(self, training=True, force_load_plans=False): ++ """ ++ - replaced get_default_augmentation with get_moreDA_augmentation ++ - enforce to only run this code once ++ - loss function wrapper for deep supervision ++ ++ :param training: ++ :param force_load_plans: ++ :return: ++ """ ++ if not self.was_initialized: ++ maybe_mkdir_p(self.output_folder) ++ ++ if force_load_plans or (self.plans is None): ++ self.load_plans_file() ++ ++ self.process_plans(self.plans) ++ ++ self.setup_DA_params() ++ ++ ################# Here we wrap the loss for deep supervision ############ ++ # we need to know the number of outputs of the network ++ net_numpool = len(self.net_num_pool_op_kernel_sizes) ++ ++ # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases ++ # this gives higher resolution outputs more weight in the loss ++ weights = np.array([1 / (2 ** i) for i in range(net_numpool)]) ++ ++ # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 ++ mask = np.array([True] + [True if i < net_numpool - 1 else False for i in range(1, net_numpool)]) ++ weights[~mask] = 0 ++ weights = weights / weights.sum() ++ # self.ds_loss_weights = weights ++ self.ds_loss_weights = None ++ # now wrap the loss ++ self.loss = MultipleOutputLoss2(self.loss, self.ds_loss_weights) ++ ################# END ################### ++ ++ self.folder_with_preprocessed_data = join(self.dataset_directory, self.plans['data_identifier'] + ++ "_stage%d" % self.stage) ++ if training: ++ self.dl_tr, self.dl_val = self.get_basic_generators() ++ if self.unpack_data: ++ if self.local_rank == 0: ++ print("unpacking dataset") ++ unpack_dataset(self.folder_with_preprocessed_data) ++ print("done") ++ else: ++ # we need to wait until worker 0 has finished unpacking ++ npz_files = subfiles(self.folder_with_preprocessed_data, suffix=".npz", join=False) ++ case_ids = [i[:-4] for i in npz_files] ++ all_present = all( ++ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) ++ while not all_present: ++ print("worker", self.local_rank, "is waiting for unpacking") ++ time.sleep(3) ++ all_present = all( ++ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) ++ # there is some slight chance that there may arise some error because dataloader are loading a file ++ # that is still being written by worker 0. We ignore this for now an address it only if it becomes ++ # relevant ++ # (this can occur because while worker 0 writes the file is technically present so the other workers ++ # will proceed and eventually try to read it) ++ else: ++ print( ++ "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " ++ "will wait all winter for your model to finish!") ++ ++ self.tr_gen, self.val_gen = get_moreDA_augmentation( ++ self.dl_tr, self.dl_val, ++ self.data_aug_params[ ++ 'patch_size_for_spatialtransform'], ++ self.data_aug_params, ++ deep_supervision_scales=self.deep_supervision_scales, ++ pin_memory=self.pin_memory ++ ) ++ self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), ++ also_print_to_console=False) ++ self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), ++ also_print_to_console=False) ++ else: ++ pass ++ ++ self.initialize_network() ++ self.initialize_optimizer_and_scheduler() ++ ++ assert isinstance(self.network, (SegmentationNetwork, DDP)) ++ else: ++ self.print_to_log_file('self.was_initialized is True, not running self.initialize again') ++ self.was_initialized = True ++ ++ def initialize_network(self): ++ """ ++ - momentum 0.99 ++ - SGD instead of Adam ++ - self.lr_scheduler = None because we do poly_lr ++ - deep supervision = True ++ - i am sure I forgot something here ++ ++ Known issue: forgot to set neg_slope=0 in InitWeights_He; should not make a difference though ++ :return: ++ """ ++ if self.threeD: ++ conv_op = nn.Conv3d ++ dropout_op = nn.Dropout3d ++ norm_op = nn.InstanceNorm3d ++ ++ else: ++ conv_op = nn.Conv2d ++ dropout_op = nn.Dropout2d ++ norm_op = nn.InstanceNorm2d ++ norm_op_kwargs = {'eps': 1e-5, 'affine': True} ++ dropout_op_kwargs = {'p': 0, 'inplace': True} ++ net_nonlin = nn.LeakyReLU ++ net_nonlin_kwargs = {'negative_slope': 1e-2, 'inplace': True} ++ self.network = Generic_UNetPlusPlus(self.num_input_channels, self.base_num_features, self.num_classes, ++ len(self.net_num_pool_op_kernel_sizes), ++ self.conv_per_stage, 2, conv_op, norm_op, norm_op_kwargs, dropout_op, ++ dropout_op_kwargs, ++ net_nonlin, net_nonlin_kwargs, True, False, lambda x: x, ++ InitWeights_He(1e-2), ++ self.net_num_pool_op_kernel_sizes, self.net_conv_kernel_sizes, False, True, ++ True) ++ if torch.cuda.is_available(): ++ self.network.cuda() ++ self.network.inference_apply_nonlin = softmax_helper ++ self.network = DDP(self.network, device_ids=[self.local_rank], find_unused_parameters=True) ++ ++ # self.network = DDP(self.network, device_ids=[self.local_rank], find_unused_parameters=True) ++ ++ def initialize_optimizer_and_scheduler(self): ++ assert self.network is not None, "self.initialize_network must be called first" ++ print('weight_decay: ', self.weight_decay) ++ sys.stdout.flush() ++ self.optimizer = torch.optim.SGD(self.network.parameters(), self.initial_lr, weight_decay=self.weight_decay, ++ momentum=0.99, nesterov=True) ++ self.lr_scheduler = None ++ ++ def run_online_evaluation(self, output, target): ++ """ ++ due to deep supervision the return value and the reference are now lists of tensors. We only need the full ++ resolution output because this is what we are interested in in the end. The others are ignored ++ :param output: ++ :param target: ++ :return: ++ """ ++ target = target[0] ++ output = output[0] ++ return super().run_online_evaluation(output, target) ++ ++ def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, ++ step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, ++ validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, ++ segmentation_export_kwargs: dict = None): ++ """ ++ We need to wrap this because we need to enforce self.network.do_ds = False for prediction ++ """ ++ if self.local_rank == 0: ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = self.network.do_ds ++ net.do_ds = False ++ ret = super().validate(do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian, ++ overwrite, validation_folder_name, debug, all_in_gpu, segmentation_export_kwargs) ++ ++ net.do_ds = ds ++ return ret ++ ++ def predict_preprocessed_data_return_seg_and_softmax(self, data: np.ndarray, do_mirroring: bool = True, ++ mirror_axes: Tuple[int] = None, ++ use_sliding_window: bool = True, step_size: float = 0.5, ++ use_gaussian: bool = True, pad_border_mode: str = 'constant', ++ pad_kwargs: dict = None, all_in_gpu: bool = True, ++ verbose: bool = True, mixed_precision=True) -> Tuple[ ++ np.ndarray, np.ndarray]: ++ """ ++ We need to wrap this because we need to enforce self.network.do_ds = False for prediction ++ """ ++ ds = self.network.do_ds ++ self.network.do_ds = False ++ ret = super().predict_preprocessed_data_return_seg_and_softmax(data, do_mirroring, mirror_axes, ++ use_sliding_window, step_size, use_gaussian, ++ pad_border_mode, pad_kwargs, all_in_gpu, verbose, ++ mixed_precision=mixed_precision) ++ self.network.do_ds = ds ++ return ret ++ ++ def run_iteration(self, data_generator, do_backprop=True, run_online_evaluation=False): ++ """ ++ gradient clipping improves training stability ++ ++ :param data_generator: ++ :param do_backprop: ++ :param run_online_evaluation: ++ :return: ++ """ ++ data_dict = next(data_generator) ++ data = data_dict['data'] ++ target = data_dict['target'] ++ ++ data = maybe_to_torch(data) ++ target = maybe_to_torch(target) ++ ++ if torch.cuda.is_available(): ++ data = to_cuda(data, gpu_id=None) ++ target = to_cuda(target, gpu_id=None) ++ ++ self.optimizer.zero_grad() ++ ++ if self.fp16: ++ with autocast(): ++ output = self.network(data) ++ del data ++ l = self.loss(output, target) ++ ++ if do_backprop: ++ self.amp_grad_scaler.scale(l).backward() ++ self.amp_grad_scaler.unscale_(self.optimizer) ++ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) ++ self.amp_grad_scaler.step(self.optimizer) ++ self.amp_grad_scaler.update() ++ else: ++ output = self.network(data) ++ del data ++ l = self.loss(output, target) ++ ++ if do_backprop: ++ l.backward() ++ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) ++ self.optimizer.step() ++ ++ if run_online_evaluation: ++ self.run_online_evaluation(output, target) ++ ++ del target ++ ++ return l.detach().cpu().numpy() ++ ++ def do_split(self): ++ """ ++ we now allow more than 5 splits. IMPORTANT: and fold > 4 will not be a real split but just another random ++ 80:20 split of the data. You cannot run X-fold cross-validation with this code. It will always be a 5-fold CV. ++ Folds > 4 will be independent from each other ++ :return: ++ """ ++ if self.fold == "all": ++ # if fold==all then we use all images for training and validation ++ tr_keys = val_keys = list(self.dataset.keys()) ++ else: ++ splits_file = join(self.dataset_directory, "splits_final.pkl") ++ ++ # if the split file does not exist we need to create it ++ if not isfile(splits_file): ++ self.print_to_log_file("Creating new split...") ++ splits = [] ++ all_keys_sorted = np.sort(list(self.dataset.keys())) ++ kfold = KFold(n_splits=5, shuffle=True, random_state=12345) ++ for i, (train_idx, test_idx) in enumerate(kfold.split(all_keys_sorted)): ++ train_keys = np.array(all_keys_sorted)[train_idx] ++ test_keys = np.array(all_keys_sorted)[test_idx] ++ splits.append(OrderedDict()) ++ splits[-1]['train'] = train_keys ++ splits[-1]['val'] = test_keys ++ save_pickle(splits, splits_file) ++ ++ splits = load_pickle(splits_file) ++ ++ if self.fold < len(splits): ++ tr_keys = splits[self.fold]['train'] ++ val_keys = splits[self.fold]['val'] ++ else: ++ self.print_to_log_file("INFO: Requested fold %d but split file only has %d folds. I am now creating a " ++ "random 80:20 split!" % (self.fold, len(splits))) ++ # if we request a fold that is not in the split file, create a random 80:20 split ++ rnd = np.random.RandomState(seed=12345 + self.fold) ++ keys = np.sort(list(self.dataset.keys())) ++ idx_tr = rnd.choice(len(keys), int(len(keys) * 0.8), replace=False) ++ idx_val = [i for i in range(len(keys)) if i not in idx_tr] ++ tr_keys = [keys[i] for i in idx_tr] ++ val_keys = [keys[i] for i in idx_val] ++ ++ tr_keys.sort() ++ val_keys.sort() ++ self.dataset_tr = OrderedDict() ++ for i in tr_keys: ++ self.dataset_tr[i] = self.dataset[i] ++ self.dataset_val = OrderedDict() ++ for i in val_keys: ++ self.dataset_val[i] = self.dataset[i] ++ ++ def setup_DA_params(self): ++ """ ++ - we increase roation angle from [-15, 15] to [-30, 30] ++ - scale range is now (0.7, 1.4), was (0.85, 1.25) ++ - we don't do elastic deformation anymore ++ ++ :return: ++ """ ++ ++ self.deep_supervision_scales = [[1, 1, 1]] + list(list(i) for i in 1 / np.cumprod( ++ np.vstack(self.net_num_pool_op_kernel_sizes), axis=0))[:-1] ++ ++ if self.threeD: ++ self.data_aug_params = default_3D_augmentation_params ++ self.data_aug_params['rotation_x'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) ++ self.data_aug_params['rotation_y'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) ++ self.data_aug_params['rotation_z'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) ++ if self.do_dummy_2D_aug: ++ self.data_aug_params["dummy_2D"] = True ++ self.print_to_log_file("Using dummy2d data augmentation") ++ self.data_aug_params["elastic_deform_alpha"] = \ ++ default_2D_augmentation_params["elastic_deform_alpha"] ++ self.data_aug_params["elastic_deform_sigma"] = \ ++ default_2D_augmentation_params["elastic_deform_sigma"] ++ self.data_aug_params["rotation_x"] = default_2D_augmentation_params["rotation_x"] ++ else: ++ self.do_dummy_2D_aug = False ++ if max(self.patch_size) / min(self.patch_size) > 1.5: ++ default_2D_augmentation_params['rotation_x'] = (-15. / 360 * 2. * np.pi, 15. / 360 * 2. * np.pi) ++ self.data_aug_params = default_2D_augmentation_params ++ self.data_aug_params["mask_was_used_for_normalization"] = self.use_mask_for_norm ++ ++ if self.do_dummy_2D_aug: ++ self.basic_generator_patch_size = get_patch_size(self.patch_size[1:], ++ self.data_aug_params['rotation_x'], ++ self.data_aug_params['rotation_y'], ++ self.data_aug_params['rotation_z'], ++ self.data_aug_params['scale_range']) ++ self.basic_generator_patch_size = np.array([self.patch_size[0]] + list(self.basic_generator_patch_size)) ++ patch_size_for_spatialtransform = self.patch_size[1:] ++ else: ++ self.basic_generator_patch_size = get_patch_size(self.patch_size, self.data_aug_params['rotation_x'], ++ self.data_aug_params['rotation_y'], ++ self.data_aug_params['rotation_z'], ++ self.data_aug_params['scale_range']) ++ patch_size_for_spatialtransform = self.patch_size ++ ++ self.data_aug_params["scale_range"] = (0.7, 1.4) ++ self.data_aug_params["do_elastic"] = False ++ self.data_aug_params['selected_seg_channels'] = [0] ++ self.data_aug_params['patch_size_for_spatialtransform'] = patch_size_for_spatialtransform ++ ++ self.data_aug_params["num_cached_per_thread"] = 2 ++ ++ def maybe_update_lr(self, epoch=None): ++ """ ++ if epoch is not None we overwrite epoch. Else we use epoch = self.epoch + 1 ++ ++ (maybe_update_lr is called in on_epoch_end which is called before epoch is incremented. ++ herefore we need to do +1 here) ++ ++ :param epoch: ++ :return: ++ """ ++ if epoch is None: ++ ep = self.epoch + 1 ++ else: ++ ep = epoch ++ self.optimizer.param_groups[0]['lr'] = poly_lr(ep, self.max_num_epochs, self.initial_lr, 0.9) ++ self.print_to_log_file("lr:", np.round(self.optimizer.param_groups[0]['lr'], decimals=6)) ++ ++ def on_epoch_end(self): ++ """ ++ overwrite patient-based early stopping. Always run to 1000 epochs ++ :return: ++ """ ++ super().on_epoch_end() ++ continue_training = self.epoch < self.max_num_epochs ++ ++ # it can rarely happen that the momentum of nnUNetTrainerV2_plus is too high for some dataset. If at epoch 100 the ++ # estimated validation Dice is still 0 then we reduce the momentum from 0.99 to 0.95 ++ if self.epoch == 100: ++ if self.all_val_eval_metrics[-1] == 0: ++ self.optimizer.param_groups[0]["momentum"] = 0.95 ++ self.network.apply(InitWeights_He(1e-2)) ++ self.print_to_log_file("At epoch 100, the mean foreground Dice was 0. This can be caused by a too " ++ "high momentum. High momentum (0.99) is good for datasets where it works, but " ++ "sometimes causes issues such as this one. Momentum has now been reduced to " ++ "0.95 and network weights have been reinitialized") ++ return continue_training ++ ++ def save_checkpoint(self, fname, save_optimizer=True): ++ if self.local_rank == 0: ++ super().save_checkpoint(fname, save_optimizer) ++ ++ def plot_progress(self): ++ if self.local_rank == 0: ++ super().plot_progress() ++ ++ def print_to_log_file(self, *args, also_print_to_console=True): ++ if self.local_rank == 0: ++ super().print_to_log_file(*args, also_print_to_console=also_print_to_console) ++ ++ def run_training(self): ++ """ ++ if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first ++ continued epoch with self.initial_lr ++ ++ we also need to make sure deep supervision in the network is enabled for training, thus the wrapper ++ :return: ++ """ ++ self.maybe_update_lr(self.epoch) # if we dont overwrite epoch then self.epoch+1 is used which is not what we ++ # want at the start of the training ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = net.do_ds ++ net.do_ds = True ++ ret = super().run_training() ++ net.do_ds = ds ++ return ret +diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_hypDDP.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_hypDDP.py +new file mode 100644 +index 0000000..aab27fe +--- /dev/null ++++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_hypDDP.py +@@ -0,0 +1,457 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++from collections import OrderedDict ++from time import sleep ++from typing import Tuple ++ ++import numpy as np ++import torch ++import torch.distributed as dist ++from torch.cuda.amp import autocast ++from torch.nn.parallel import DistributedDataParallel as DDP ++from batchgenerators.utilities.file_and_folder_operations import maybe_mkdir_p, join, subfiles, isfile ++from nnunet.network_architecture.neural_network import SegmentationNetwork ++from nnunet.training.data_augmentation.default_data_augmentation import get_moreDA_augmentation ++from nnunet.training.dataloading.dataset_loading import unpack_dataset ++from nnunet.training.loss_functions.crossentropy import RobustCrossEntropyLoss ++from nnunet.training.loss_functions.dice_loss import get_tp_fp_fn_tn ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.training.network_training.nnUNetPlusPlusTrainerV2 import nnUNetPlusPlusTrainerV2 ++from nnunet.utilities.distributed import awesome_allgather_function ++from nnunet.utilities.nd_softmax import softmax_helper ++from nnunet.utilities.tensor_utilities import sum_tensor ++from nnunet.utilities.to_torch import to_cuda, maybe_to_torch ++from torch import nn ++from torch.nn.utils import clip_grad_norm_ ++from torch.optim.lr_scheduler import _LRScheduler ++ ++ ++class nnUNetPlusPlusTrainerV2_hypDDP(nnUNetPlusPlusTrainerV2): ++ def __init__(self, plans_file, fold, local_rank, output_folder=None, dataset_directory=None, batch_dice=True, ++ stage=None, ++ unpack_data=True, deterministic=True, distribute_batch_size=False, fp16=False): ++ super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, ++ unpack_data, deterministic, fp16) ++ self.init_args = ( ++ plans_file, fold, local_rank, output_folder, dataset_directory, batch_dice, stage, unpack_data, ++ deterministic, distribute_batch_size, fp16) ++ self.distribute_batch_size = distribute_batch_size ++ np.random.seed(local_rank) ++ torch.manual_seed(local_rank) ++ if torch.cuda.is_available(): ++ torch.cuda.manual_seed_all(local_rank) ++ self.local_rank = local_rank ++ ++ if torch.cuda.is_available(): ++ torch.cuda.set_device(local_rank) ++ dist.init_process_group(backend='nccl', init_method='env://') ++ ++ self.val_loss_ma_alpha = 0.95 ++ self.val_loss_MA = None ++ ++ self.loss = None ++ self.ce_loss = RobustCrossEntropyLoss() ++ ++ self.global_batch_size = None # we need to know this to properly steer oversample ++ ++ def set_batch_size_and_oversample(self): ++ batch_sizes = [] ++ oversample_percents = [] ++ ++ world_size = dist.get_world_size() ++ my_rank = dist.get_rank() ++ ++ if self.distribute_batch_size: ++ self.global_batch_size = self.batch_size ++ else: ++ self.global_batch_size = self.batch_size * world_size ++ ++ batch_size_per_GPU = np.ceil(self.batch_size / world_size).astype(int) ++ ++ for rank in range(world_size): ++ if self.distribute_batch_size: ++ if (rank + 1) * batch_size_per_GPU > self.batch_size: ++ batch_size = batch_size_per_GPU - ((rank + 1) * batch_size_per_GPU - self.batch_size) ++ else: ++ batch_size = batch_size_per_GPU ++ else: ++ batch_size = self.batch_size ++ ++ batch_sizes.append(batch_size) ++ ++ sample_id_low = 0 if len(batch_sizes) == 0 else np.sum(batch_sizes[:-1]) ++ sample_id_high = np.sum(batch_sizes) ++ ++ if sample_id_high / self.global_batch_size < (1 - self.oversample_foreground_percent): ++ oversample_percents.append(0.0) ++ elif sample_id_low / self.global_batch_size > (1 - self.oversample_foreground_percent): ++ oversample_percents.append(1.0) ++ else: ++ percent_covered_by_this_rank = sample_id_high / self.global_batch_size - sample_id_low / self.global_batch_size ++ oversample_percent_here = 1 - (((1 - self.oversample_foreground_percent) - ++ sample_id_low / self.global_batch_size) / percent_covered_by_this_rank) ++ oversample_percents.append(oversample_percent_here) ++ ++ print("worker", my_rank, "oversample", oversample_percents[my_rank]) ++ print("worker", my_rank, "batch_size", batch_sizes[my_rank]) ++ ++ self.batch_size = batch_sizes[my_rank] ++ self.oversample_foreground_percent = oversample_percents[my_rank] ++ ++ def save_checkpoint(self, fname, save_optimizer=True): ++ if self.local_rank == 0: ++ super().save_checkpoint(fname, save_optimizer) ++ ++ def plot_progress(self): ++ if self.local_rank == 0: ++ super().plot_progress() ++ ++ def print_to_log_file(self, *args, also_print_to_console=True): ++ if self.local_rank == 0: ++ super().print_to_log_file(*args, also_print_to_console=also_print_to_console) ++ ++ def process_plans(self, plans): ++ super().process_plans(plans) ++ self.set_batch_size_and_oversample() ++ ++ def initialize(self, training=True, force_load_plans=False): ++ """ ++ For prediction of test cases just set training=False, this will prevent loading of training data and ++ training batchgenerator initialization ++ :param training: ++ :return: ++ """ ++ if not self.was_initialized: ++ maybe_mkdir_p(self.output_folder) ++ ++ if force_load_plans or (self.plans is None): ++ self.load_plans_file() ++ ++ self.process_plans(self.plans) ++ ++ self.setup_DA_params() ++ ++ self.folder_with_preprocessed_data = join(self.dataset_directory, self.plans['data_identifier'] + ++ "_stage%d" % self.stage) ++ if training: ++ self.dl_tr, self.dl_val = self.get_basic_generators() ++ if self.unpack_data: ++ if self.local_rank == 0: ++ print("unpacking dataset") ++ unpack_dataset(self.folder_with_preprocessed_data) ++ print("done") ++ else: ++ # we need to wait until worker 0 has finished unpacking ++ npz_files = subfiles(self.folder_with_preprocessed_data, suffix=".npz", join=False) ++ case_ids = [i[:-4] for i in npz_files] ++ all_present = all( ++ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) ++ while not all_present: ++ print("worker", self.local_rank, "is waiting for unpacking") ++ sleep(3) ++ all_present = all( ++ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) ++ # there is some slight chance that there may arise some error because dataloader are loading a file ++ # that is still being written by worker 0. We ignore this for now an address it only if it becomes ++ # relevant ++ # (this can occur because while worker 0 writes the file is technically present so the other workers ++ # will proceed and eventually try to read it) ++ else: ++ print( ++ "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " ++ "will wait all winter for your model to finish!") ++ ++ # setting weights for deep supervision losses ++ net_numpool = len(self.net_num_pool_op_kernel_sizes) ++ ++ # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases ++ # this gives higher resolution outputs more weight in the loss ++ weights = np.array([1 / (2 ** i) for i in range(net_numpool)]) ++ ++ # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 ++ mask = np.array([True if i < net_numpool - 1 else False for i in range(net_numpool)]) ++ weights[~mask] = 0 ++ weights = weights / weights.sum() ++ self.ds_loss_weights = weights ++ ++ seeds_train = np.random.random_integers(0, 99999, self.data_aug_params.get('num_threads')) ++ seeds_val = np.random.random_integers(0, 99999, max(self.data_aug_params.get('num_threads') // 2, 1)) ++ print("seeds train", seeds_train) ++ print("seeds_val", seeds_val) ++ self.tr_gen, self.val_gen = get_moreDA_augmentation(self.dl_tr, self.dl_val, ++ self.data_aug_params[ ++ 'patch_size_for_spatialtransform'], ++ self.data_aug_params, ++ deep_supervision_scales=self.deep_supervision_scales, ++ seeds_train=seeds_train, ++ seeds_val=seeds_val, ++ pin_memory=self.pin_memory) ++ self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), ++ also_print_to_console=False) ++ self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), ++ also_print_to_console=False) ++ else: ++ pass ++ ++ self.initialize_network() ++ self.initialize_optimizer_and_scheduler() ++ self.network = DDP(self.network, device_ids=[self.local_rank]) ++ ++ else: ++ self.print_to_log_file('self.was_initialized is True, not running self.initialize again') ++ self.was_initialized = True ++ ++ def run_iteration(self, data_generator, do_backprop=True, run_online_evaluation=False): ++ data_dict = next(data_generator) ++ data = data_dict['data'] ++ target = data_dict['target'] ++ ++ data = maybe_to_torch(data) ++ target = maybe_to_torch(target) ++ ++ if torch.cuda.is_available(): ++ data = to_cuda(data, gpu_id=None) ++ target = to_cuda(target, gpu_id=None) ++ ++ self.optimizer.zero_grad() ++ # print('self.fp16=', self.fp16, end=' ') ++ if self.fp16: ++ with autocast(): ++ # print('if', data.shape, len(target), target[0].shape, end=' ') ++ output = self.network(data) ++ # print(len(output), output[0].shape) ++ del data ++ # print(len(output), output[0].shape, target[0].shape) ++ l = self.compute_loss(output, target) ++ ++ if do_backprop: ++ self.amp_grad_scaler.scale(l).backward() ++ self.amp_grad_scaler.unscale_(self.optimizer) ++ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) ++ self.amp_grad_scaler.step(self.optimizer) ++ self.amp_grad_scaler.update() ++ else: ++ # print('else', data.shape, len(target), target[0].shape, end=' ') ++ output = self.network(data) ++ # print(len(output), output[0].shape) ++ del data ++ l = self.compute_loss(output, target) ++ ++ if do_backprop: ++ l.backward() ++ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) ++ self.optimizer.step() ++ ++ if run_online_evaluation: ++ self.run_online_evaluation(output, target) ++ ++ del target ++ ++ return l.detach().cpu().numpy() ++ ++ def compute_loss(self, output, target): ++ total_loss = None ++ length = len(output) ++ # length = 1 ++ for i in range(length): ++ # Starting here it gets spicy! ++ axes = tuple(range(2, len(output[i].size()))) ++ ++ # network does not do softmax. We need to do softmax for dice ++ output_softmax = softmax_helper(output[i]) ++ ++ # get the tp, fp and fn terms we need ++ tp, fp, fn, _ = get_tp_fp_fn_tn(output_softmax, target[0], axes, mask=None) ++ # for dice, compute nominator and denominator so that we have to accumulate only 2 instead of 3 variables ++ # do_bg=False in nnUNetTrainer -> [:, 1:] ++ nominator = 2 * tp[:, 1:] ++ denominator = 2 * tp[:, 1:] + fp[:, 1:] + fn[:, 1:] ++ ++ if self.batch_dice: ++ # for DDP we need to gather all nominator and denominator terms from all GPUS to do proper batch dice ++ nominator = awesome_allgather_function.apply(nominator) ++ denominator = awesome_allgather_function.apply(denominator) ++ nominator = nominator.sum(0) ++ denominator = denominator.sum(0) ++ else: ++ pass ++ ++ ce_loss = self.ce_loss(output[i], target[0][:, 0].long()) ++ ++ # we smooth by 1e-5 to penalize false positives if tp is 0 ++ dice_loss = (- (nominator + 1e-5) / (denominator + 1e-5)).mean() ++ if total_loss is None: ++ total_loss = self.ds_loss_weights[i] * (ce_loss + dice_loss) ++ else: ++ total_loss += self.ds_loss_weights[i] * (ce_loss + dice_loss) ++ return total_loss ++ ++ def run_online_evaluation(self, output, target): ++ with torch.no_grad(): ++ num_classes = output[0].shape[1] ++ output_seg = output[0].argmax(1) ++ target = target[0][:, 0] ++ axes = tuple(range(1, len(target.shape))) ++ tp_hard = torch.zeros((target.shape[0], num_classes - 1)).to(output_seg.device.index) ++ fp_hard = torch.zeros((target.shape[0], num_classes - 1)).to(output_seg.device.index) ++ fn_hard = torch.zeros((target.shape[0], num_classes - 1)).to(output_seg.device.index) ++ for c in range(1, num_classes): ++ tp_hard[:, c - 1] = sum_tensor((output_seg == c).float() * (target == c).float(), axes=axes) ++ fp_hard[:, c - 1] = sum_tensor((output_seg == c).float() * (target != c).float(), axes=axes) ++ fn_hard[:, c - 1] = sum_tensor((output_seg != c).float() * (target == c).float(), axes=axes) ++ ++ # tp_hard, fp_hard, fn_hard = get_tp_fp_fn((output_softmax > (1 / num_classes)).float(), target, ++ # axes, None) ++ # print_if_rank0("before allgather", tp_hard.shape) ++ tp_hard = tp_hard.sum(0, keepdim=False)[None] ++ fp_hard = fp_hard.sum(0, keepdim=False)[None] ++ fn_hard = fn_hard.sum(0, keepdim=False)[None] ++ ++ tp_hard = awesome_allgather_function.apply(tp_hard) ++ fp_hard = awesome_allgather_function.apply(fp_hard) ++ fn_hard = awesome_allgather_function.apply(fn_hard) ++ ++ tp_hard = tp_hard.detach().cpu().numpy().sum(0) ++ fp_hard = fp_hard.detach().cpu().numpy().sum(0) ++ fn_hard = fn_hard.detach().cpu().numpy().sum(0) ++ self.online_eval_foreground_dc.append(list((2 * tp_hard) / (2 * tp_hard + fp_hard + fn_hard + 1e-8))) ++ self.online_eval_tp.append(list(tp_hard)) ++ self.online_eval_fp.append(list(fp_hard)) ++ self.online_eval_fn.append(list(fn_hard)) ++ ++ def run_training(self): ++ """ ++ if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first ++ continued epoch with self.initial_lr ++ ++ we also need to make sure deep supervision in the network is enabled for training, thus the wrapper ++ :return: ++ """ ++ self.maybe_update_lr(self.epoch) # if we dont overwrite epoch then self.epoch+1 is used which is not what we ++ # want at the start of the training ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = net.do_ds ++ net.do_ds = True ++ ret = nnUNetTrainer.run_training(self) ++ net.do_ds = ds ++ return ret ++ ++ def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, ++ step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, ++ validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, ++ segmentation_export_kwargs: dict = None): ++ if self.local_rank == 0: ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = net.do_ds ++ net.do_ds = False ++ ++ ret = nnUNetTrainer.validate(self, do_mirroring, use_sliding_window, step_size, save_softmax, ++ use_gaussian, overwrite, validation_folder_name, debug, all_in_gpu, ++ segmentation_export_kwargs) ++ net.do_ds = ds ++ return ret ++ ++ def predict_preprocessed_data_return_seg_and_softmax(self, data: np.ndarray, do_mirroring: bool = True, ++ mirror_axes: Tuple[int] = None, ++ use_sliding_window: bool = True, step_size: float = 0.5, ++ use_gaussian: bool = True, pad_border_mode: str = 'constant', ++ pad_kwargs: dict = None, all_in_gpu: bool = True, ++ verbose: bool = True, mixed_precision=True, img_name=None, ++ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: ++ if pad_border_mode == 'constant' and pad_kwargs is None: ++ pad_kwargs = {'constant_values': 0} ++ ++ if do_mirroring and mirror_axes is None: ++ mirror_axes = self.data_aug_params['mirror_axes'] ++ ++ if do_mirroring: ++ assert self.data_aug_params["do_mirror"], "Cannot do mirroring as test time augmentation when training " \ ++ "was done without mirroring" ++ ++ valid = list((SegmentationNetwork, nn.DataParallel, DDP)) ++ assert isinstance(self.network, tuple(valid)) ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = net.do_ds ++ net.do_ds = False ++ ret = net.predict_3D(data, do_mirroring, mirror_axes, use_sliding_window, step_size, self.patch_size, ++ self.regions_class_order, use_gaussian, pad_border_mode, pad_kwargs, ++ all_in_gpu, verbose, mixed_precision=mixed_precision) ++ net.do_ds = ds ++ return ret ++ ++ def load_checkpoint_ram(self, checkpoint, train=True): ++ """ ++ used for if the checkpoint is already in ram ++ :param checkpoint: ++ :param train: ++ :return: ++ """ ++ if not self.was_initialized: ++ self.initialize(train) ++ ++ new_state_dict = OrderedDict() ++ curr_state_dict_keys = list(self.network.state_dict().keys()) ++ # if state dict comes form nn.DataParallel but we use non-parallel model here then the state dict keys do not ++ # match. Use heuristic to make it match ++ for k, value in checkpoint['state_dict'].items(): ++ key = k ++ if key not in curr_state_dict_keys: ++ print("duh") ++ key = key[7:] ++ new_state_dict[key] = value ++ ++ if self.fp16: ++ self._maybe_init_amp() ++ if 'amp_grad_scaler' in checkpoint.keys(): ++ self.amp_grad_scaler.load_state_dict(checkpoint['amp_grad_scaler']) ++ ++ self.network.load_state_dict(new_state_dict) ++ self.epoch = checkpoint['epoch'] ++ if train: ++ optimizer_state_dict = checkpoint['optimizer_state_dict'] ++ if optimizer_state_dict is not None: ++ self.optimizer.load_state_dict(optimizer_state_dict) ++ ++ if self.lr_scheduler is not None and hasattr(self.lr_scheduler, 'load_state_dict') and checkpoint[ ++ 'lr_scheduler_state_dict'] is not None: ++ self.lr_scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict']) ++ ++ if issubclass(self.lr_scheduler.__class__, _LRScheduler): ++ self.lr_scheduler.step(self.epoch) ++ ++ self.all_tr_losses, self.all_val_losses, self.all_val_losses_tr_mode, self.all_val_eval_metrics = checkpoint[ ++ 'plot_stuff'] ++ ++ # after the training is done, the epoch is incremented one more time in my old code. This results in ++ # self.epoch = 1001 for old trained models when the epoch is actually 1000. This causes issues because ++ # len(self.all_tr_losses) = 1000 and the plot function will fail. We can easily detect and correct that here ++ if self.epoch != len(self.all_tr_losses): ++ self.print_to_log_file("WARNING in loading checkpoint: self.epoch != len(self.all_tr_losses). This is " ++ "due to an old bug and should only appear when you are loading old models. New " ++ "models should have this fixed! self.epoch is now set to len(self.all_tr_losses)") ++ self.epoch = len(self.all_tr_losses) ++ self.all_tr_losses = self.all_tr_losses[:self.epoch] ++ self.all_val_losses = self.all_val_losses[:self.epoch] ++ self.all_val_losses_tr_mode = self.all_val_losses_tr_mode[:self.epoch] ++ self.all_val_eval_metrics = self.all_val_eval_metrics[:self.epoch] +diff --git a/pytorch/nnunet/training/network_training/nnUNetTrainer.py b/pytorch/nnunet/training/network_training/nnUNetTrainer.py +index 2dbf815..a20553f 100644 +--- a/pytorch/nnunet/training/network_training/nnUNetTrainer.py ++++ b/pytorch/nnunet/training/network_training/nnUNetTrainer.py +@@ -40,6 +40,7 @@ from nnunet.utilities.nd_softmax import softmax_helper + from nnunet.utilities.tensor_utilities import sum_tensor + from torch import nn + from torch.optim import lr_scheduler ++from nnunet.inference.infer_path import INFERENCE_OUTPUT_FOLDER + + + matplotlib.use("agg") +@@ -482,7 +483,8 @@ class nnUNetTrainer(NetworkTrainer): + use_sliding_window: bool = True, step_size: float = 0.5, + use_gaussian: bool = True, pad_border_mode: str = 'constant', + pad_kwargs: dict = None, all_in_gpu: bool = True, +- verbose: bool = True, mixed_precision: bool = True) -> Tuple[np.ndarray, np.ndarray]: ++ verbose: bool = True, mixed_precision: bool = True, ++ img_name=None, pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: + """ + :param data: + :param do_mirroring: +@@ -513,7 +515,7 @@ class nnUNetTrainer(NetworkTrainer): + self.network.eval() + ret = self.network.predict_3D(data, do_mirroring, mirror_axes, use_sliding_window, step_size, self.patch_size, + self.regions_class_order, use_gaussian, pad_border_mode, pad_kwargs, +- all_in_gpu, verbose, mixed_precision=mixed_precision) ++ all_in_gpu, verbose, mixed_precision=mixed_precision, img_name=img_name, pre_mode=pre_mode, fp=fp) + self.network.train(current_mode) + return ret + +@@ -533,8 +535,8 @@ class nnUNetTrainer(NetworkTrainer): + self.load_dataset() + self.do_split() + +- if segmentation_export_kwargs is None: +- if 'segmentation_export_params' in self.plans.keys(): ++ if segmentation_export_kwargs is None: # True ++ if 'segmentation_export_params' in self.plans.keys(): # False + force_separate_z = self.plans['segmentation_export_params']['force_separate_z'] + interpolation_order = self.plans['segmentation_export_params']['interpolation_order'] + interpolation_order_z = self.plans['segmentation_export_params']['interpolation_order_z'] +@@ -576,21 +578,21 @@ class nnUNetTrainer(NetworkTrainer): + export_pool = Pool(default_num_threads) + results = [] + +- for k in self.dataset_val.keys(): ++ for k in self.dataset_val.keys(): # k = Liver_101 + properties = load_pickle(self.dataset[k]['properties_file']) +- fname = properties['list_of_data_files'][0].split("/")[-1][:-12] ++ fname = properties['list_of_data_files'][0].split("/")[-1][:-12] # Liver_101 + if overwrite or (not isfile(join(output_folder, fname + ".nii.gz"))) or \ + (save_softmax and not isfile(join(output_folder, fname + ".npz"))): +- data = np.load(self.dataset[k]['data_file'])['data'] ++ data = np.load(self.dataset[k]['data_file'])['data'] # 2 478 470 470 + + print(k, data.shape) + data[-1][data[-1] == -1] = 0 +- ++ simple_name = INFERENCE_OUTPUT_FOLDER + str(k) + '.nii.gz' + softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(data[:-1], do_mirroring, + mirror_axes, use_sliding_window, + step_size, use_gaussian, + all_in_gpu=all_in_gpu, +- mixed_precision=self.fp16)[1] ++ mixed_precision=self.fp16, img_name=simple_name)[1] + + softmax_pred = softmax_pred.transpose([0] + [i + 1 for i in self.transpose_backward]) + +@@ -620,8 +622,8 @@ class nnUNetTrainer(NetworkTrainer): + ) + ) + +- pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"), +- join(self.gt_niftis_folder, fname + ".nii.gz")]) ++ pred_gt_tuples.append([join(output_folder, fname + ".nii.gz"), # '/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/validation_raw' ++ join(self.gt_niftis_folder, fname + ".nii.gz")]) # '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/gt_segmentations' + + _ = [i.get() for i in results] + self.print_to_log_file("finished prediction") +diff --git a/pytorch/nnunet/training/network_training/nnUNetTrainerV2_DDP.py b/pytorch/nnunet/training/network_training/nnUNetTrainerV2_DDP.py +index 812183a..9f56d62 100644 +--- a/pytorch/nnunet/training/network_training/nnUNetTrainerV2_DDP.py ++++ b/pytorch/nnunet/training/network_training/nnUNetTrainerV2_DDP.py +@@ -226,10 +226,12 @@ class nnUNetTrainerV2_DDP(nnUNetTrainerV2): + target = to_cuda(target, gpu_id=None) + + self.optimizer.zero_grad() +- ++ # print('self.fp16=', self.fp16, end=' ') + if self.fp16: + with autocast(): ++ # print('if', data.shape, len(target), target[0].shape, end=' ') + output = self.network(data) ++ # print(len(output), output[0].shape) + del data + l = self.compute_loss(output, target) + +@@ -240,7 +242,9 @@ class nnUNetTrainerV2_DDP(nnUNetTrainerV2): + self.amp_grad_scaler.step(self.optimizer) + self.amp_grad_scaler.update() + else: ++ # print('else', data.shape, len(target), target[0].shape, end=' ') + output = self.network(data) ++ # print(len(output), output[0].shape) + del data + l = self.compute_loss(output, target) + +diff --git a/pytorch/run.sh b/pytorch/run.sh +new file mode 100644 +index 0000000..0abb8d5 +--- /dev/null ++++ b/pytorch/run.sh +@@ -0,0 +1,5 @@ ++python nnunet/run/run_training.py 3d_fullres nnUNetPlusPlusTrainerV2_DDP Task003_Liver 0 ++ ++ ++python -m torch.distributed.launch --nproc_per_node 2 nnunet/run/run_training_DDP.py 3d_fullres nnUNetPlusPlusTrainerV2_DDP Task003_Liver 0 ++ +diff --git a/pytorch/setup.py b/pytorch/setup.py +index 590a453..554f4e2 100644 +--- a/pytorch/setup.py ++++ b/pytorch/setup.py +@@ -9,13 +9,13 @@ setup(name='nnunet', + author_email='f.isensee@dkfz-heidelberg.de', + license='Apache License Version 2.0, January 2004', + install_requires=[ +- "torch>=1.6.0a", ++ "torch>=1.6.0", + "tqdm", + "dicom2nifti", + "scikit-image>=0.14", + "medpy", + "scipy", +- "batchgenerators>=0.21", ++ "batchgenerators==0.21", + "numpy", + "sklearn", + "SimpleITK", diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/onnx_infer.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/onnx_infer.py new file mode 100644 index 0000000000000000000000000000000000000000..9c47c62f4b186a1a90a2365ebac23a3aafdcb675 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/onnx_infer.py @@ -0,0 +1,91 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import time +import numpy as np +import onnxruntime +from tqdm import tqdm + + +all_time = 0 +infer_times = 200 +ignore_times = 10 +curr_time = 0 +assert infer_times > ignore_times + +def display_time(func): + def wrapper(*args): + t1 = time.time() + req = func(*args) + t2 = time.time() + spent_time = t2 - t1 + print("Single time: {:.4}s".format(spent_time)) + global all_time, curr_time + curr_time += 1 + if curr_time > ignore_times: + all_time += spent_time + return req + return wrapper + + +class ONNXModel(): + def __init__(self, onnx_path): + # providers: TensorrtExecutionProvider/CUDAExecutionProvider/CPUExecutionProvider + self.onnx_session = onnxruntime.InferenceSession(onnx_path, providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider']) + self.input_name = self.get_input_name(self.onnx_session) + self.output_name = self.get_output_name(self.onnx_session) + self.input_feed = None + + def get_output_name(self, onnx_session): + output_name = [] + for node in onnx_session.get_outputs(): + output_name.append(node.name) + return output_name + + def get_input_name(self, onnx_session): + input_name = [] + for node in onnx_session.get_inputs(): + input_name.append(node.name) + return input_name + + def get_input_feed(self, image_numpy): + input_feed = {} + for name in self.input_name: + input_feed[name] = image_numpy + self.input_feed = input_feed + + @display_time + def forward(self): + self.onnx_session.run(self.output_name, input_feed=self.input_feed) + + +def create_random_input(input_shape, dtype=np.float32): + input_data = np.random.random(input_shape).astype(dtype) + return input_data + + +if __name__ == '__main__': + model_file = sys.argv[1] + input_shape = sys.argv[2] + np.random.seed(123) + input_shape = list(map(int, input_shape.split(','))) + net = ONNXModel(model_file) + + for _ in tqdm(range(infer_times)): + input_data = create_random_input(input_shape) + net.get_input_feed(input_data) + net.forward() + + print("Average time spent: {:.4}s".format(all_time / (infer_times - ignore_times))) diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..07bda18e71fe40802c2df7cfa6eb66fbde93a2d6 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/requirements.txt @@ -0,0 +1,51 @@ +auto-tune @ file:///root/selfgz406448609/compiler/lib64/auto_tune-0.1.0-py3-none-any.whl +batchgenerators==0.21 +certifi==2021.10.8 +cycler==0.11.0 +decorator==5.1.1 +dicom2nifti==2.3.2 +fonttools==4.29.1 +future==0.18.2 +hccl @ file:///root/selfgz406448609/compiler/lib64/hccl-0.1.0-py3-none-any.whl +imageio==2.16.1 +joblib==1.1.0 +kiwisolver==1.3.2 +linecache2==1.0.0 +matplotlib==3.5.1 +MedPy==0.4.0 +mkl-fft==1.3.1 +mkl-random @ file:///tmp/build/80754af9/mkl_random_1626186064646/work +mkl-service==2.4.0 +mpmath==1.2.1 +networkx==2.6.3 +nibabel==3.2.2 +-e git+https://github.com/MrGiovanni/UNetPlusPlus.git@e145ba63862982bf1099cf2ec11d5466b434ae0b#egg=nnunet&subdirectory=pytorch +numpy @ file:///tmp/build/80754af9/numpy_and_numpy_base_1634095647912/work +olefile @ file:///Users/ktietz/demo/mc3/conda-bld/olefile_1629805411829/work +op-gen @ file:///usr/local/Ascend/ascend-toolkit/5.0.4/x86_64-linux/toolkit/tools/op_gen-0.1-py3-none-any.whl +op-test-frame @ file:///usr/local/Ascend/ascend-toolkit/5.0.4/x86_64-linux/toolkit/tools/op_test_frame-0.1-py3-none-any.whl +packaging==21.3 +pandas==1.4.1 +Pillow==8.4.0 +pydicom==2.2.2 +pyparsing==3.0.7 +python-dateutil==2.8.2 +pytz==2021.3 +PyWavelets==1.2.0 +schedule-search @ file:///root/selfgz406448609/compiler/lib64/schedule_search-0.1.0-py3-none-any.whl +scikit-image==0.19.2 +scikit-learn==1.0.2 +scipy==1.8.0 +SimpleITK==2.1.1 +six @ file:///tmp/build/80754af9/six_1644875935023/work +sklearn==0.0 +sympy==1.9 +te @ file:///root/selfgz406448609/compiler/lib64/te-0.4.0-py3-none-any.whl +threadpoolctl==3.1.0 +tifffile==2022.2.9 +topi @ file:///root/selfgz406448609/compiler/lib64/topi-0.4.0-py3-none-any.whl +torch==1.6.0 +torchvision==0.7.0 +tqdm==4.63.0 +traceback2==1.4.0 +unittest2==1.1.0 diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/set_env.sh b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/set_env.sh new file mode 100644 index 0000000000000000000000000000000000000000..b79cc18a90e8a6fd6f228ccbf1798ba96b6cc614 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/set_env.sh @@ -0,0 +1,9 @@ +export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/ascend-toolkit/latest/atc/lib64:$LD_LIBRARY_PATH +export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/toolkit/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/atc/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/pyACL/python/site-packages/acl:$PYTHONPATH +export PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/bin:/usr/local/Ascend/ascend-toolkit/latest/atc/bin:/usr/local/Ascend/ascend-toolkit/latest/atc/ccec_compiler/bin:$PATH +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp +export TOOLCHAIN_HOME=/usr/local/Ascend/ascend-toolkit/latest/toolkit +export DDK_PATH=/home/usr/local/Ascend/ascend-toolkit/latest +export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub