diff --git a/mednist_tutorial.ipynb b/2d_classification/mednist_tutorial.ipynb similarity index 99% rename from mednist_tutorial.ipynb rename to 2d_classification/mednist_tutorial.ipynb index 5edba6ac70..d26558f41c 100644 --- a/mednist_tutorial.ipynb +++ b/2d_classification/mednist_tutorial.ipynb @@ -15,7 +15,7 @@ "* Train the model with a PyTorch program\n", "* Evaluate on test dataset\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/mednist_tutorial.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/2d_classification/mednist_tutorial.ipynb)" ] }, { @@ -683,7 +683,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/2d_segmentation/torch/unet_evaluation_array.py b/2d_segmentation/torch/unet_evaluation_array.py new file mode 100644 index 0000000000..cbd8c0da47 --- /dev/null +++ b/2d_segmentation/torch/unet_evaluation_array.py @@ -0,0 +1,84 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import torch +from PIL import Image +from torch.utils.data import DataLoader + +from monai import config +from monai.data import ArrayDataset, PNGSaver, create_test_image_2d +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.networks.nets import UNet +from monai.transforms import AddChannel, Compose, LoadImage, ScaleIntensity, ToTensor + + +def main(tempdir): + config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_2d(128, 128, num_seg_classes=1) + Image.fromarray(im.astype("uint8")).save(os.path.join(tempdir, f"img{i:d}.png")) + Image.fromarray(seg.astype("uint8")).save(os.path.join(tempdir, f"seg{i:d}.png")) + + images = sorted(glob(os.path.join(tempdir, "img*.png"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.png"))) + + # define transforms for image and segmentation + imtrans = Compose([LoadImage(image_only=True), ScaleIntensity(), AddChannel(), ToTensor()]) + segtrans = Compose([LoadImage(image_only=True), AddChannel(), ToTensor()]) + val_ds = ArrayDataset(images, imtrans, segs, segtrans) + # sliding window inference for one image at every iteration + val_loader = DataLoader(val_ds, batch_size=1, num_workers=1, pin_memory=torch.cuda.is_available()) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = UNet( + dimensions=2, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + + model.load_state_dict(torch.load("best_metric_model_segmentation2d_array.pth")) + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + saver = PNGSaver(output_dir="./output") + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + # define sliding window size and batch size for windows inference + roi_size = (96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + val_outputs = val_outputs.sigmoid() >= 0.5 + saver.save_batch(val_outputs) + metric = metric_sum / metric_count + print("evaluation metric:", metric) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/2d_segmentation/torch/unet_evaluation_dict.py b/2d_segmentation/torch/unet_evaluation_dict.py new file mode 100644 index 0000000000..f20152921c --- /dev/null +++ b/2d_segmentation/torch/unet_evaluation_dict.py @@ -0,0 +1,92 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import torch +from PIL import Image +from torch.utils.data import DataLoader + +import monai +from monai.data import PNGSaver, create_test_image_2d, list_data_collate +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.networks.nets import UNet +from monai.transforms import AddChanneld, Compose, LoadImaged, ScaleIntensityd, ToTensord + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_2d(128, 128, num_seg_classes=1) + Image.fromarray(im.astype("uint8")).save(os.path.join(tempdir, f"img{i:d}.png")) + Image.fromarray(seg.astype("uint8")).save(os.path.join(tempdir, f"seg{i:d}.png")) + + images = sorted(glob(os.path.join(tempdir, "img*.png"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.png"))) + val_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadImaged(keys=["img", "seg"]), + AddChanneld(keys=["img", "seg"]), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + # sliding window inference need to input 1 image in every iteration + val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = UNet( + dimensions=2, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + + model.load_state_dict(torch.load("best_metric_model_segmentation2d_dict.pth")) + + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + saver = PNGSaver(output_dir="./output") + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device) + # define sliding window size and batch size for windows inference + roi_size = (96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + val_outputs = val_outputs.sigmoid() >= 0.5 + saver.save_batch(val_outputs) + metric = metric_sum / metric_count + print("evaluation metric:", metric) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/2d_segmentation/torch/unet_training_array.py b/2d_segmentation/torch/unet_training_array.py new file mode 100644 index 0000000000..9249fd1aaf --- /dev/null +++ b/2d_segmentation/torch/unet_training_array.py @@ -0,0 +1,166 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import torch +from PIL import Image +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter + +import monai +from monai.data import ArrayDataset, create_test_image_2d +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.transforms import AddChannel, Compose, LoadImage, RandRotate90, RandSpatialCrop, ScaleIntensity, ToTensor +from monai.visualize import plot_2d_or_3d_image + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_2d(128, 128, num_seg_classes=1) + Image.fromarray(im.astype("uint8")).save(os.path.join(tempdir, f"img{i:d}.png")) + Image.fromarray(seg.astype("uint8")).save(os.path.join(tempdir, f"seg{i:d}.png")) + + images = sorted(glob(os.path.join(tempdir, "img*.png"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.png"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images[:20], segs[:20])] + val_files = [{"img": img, "seg": seg} for img, seg in zip(images[-20:], segs[-20:])] + + # define transforms for image and segmentation + train_imtrans = Compose( + [ + LoadImage(image_only=True), + ScaleIntensity(), + AddChannel(), + RandSpatialCrop((96, 96), random_size=False), + RandRotate90(prob=0.5, spatial_axes=(0, 1)), + ToTensor(), + ] + ) + train_segtrans = Compose( + [ + LoadImage(image_only=True), + AddChannel(), + RandSpatialCrop((96, 96), random_size=False), + RandRotate90(prob=0.5, spatial_axes=(0, 1)), + ToTensor(), + ] + ) + val_imtrans = Compose([LoadImage(image_only=True), ScaleIntensity(), AddChannel(), ToTensor()]) + val_segtrans = Compose([LoadImage(image_only=True), AddChannel(), ToTensor()]) + + # define array dataset, data loader + check_ds = ArrayDataset(images, train_imtrans, segs, train_segtrans) + check_loader = DataLoader(check_ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()) + im, seg = monai.utils.misc.first(check_loader) + print(im.shape, seg.shape) + + # create a training data loader + train_ds = ArrayDataset(images[:20], train_imtrans, segs[:20], train_segtrans) + train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=8, pin_memory=torch.cuda.is_available()) + # create a validation data loader + val_ds = ArrayDataset(images[-20:], val_imtrans, segs[-20:], val_segtrans) + val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, pin_memory=torch.cuda.is_available()) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # create UNet, DiceLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.UNet( + dimensions=2, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + + # start a typical PyTorch training + val_interval = 2 + best_metric = -1 + best_metric_epoch = -1 + epoch_loss_values = list() + metric_values = list() + writer = SummaryWriter() + for epoch in range(10): + print("-" * 10) + print(f"epoch {epoch + 1}/{10}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data[0].to(device), batch_data[1].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + val_images = None + val_labels = None + val_outputs = None + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + roi_size = (96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + metric = metric_sum / metric_count + metric_values.append(metric) + if metric > best_metric: + best_metric = metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), "best_metric_model_segmentation2d_array.pth") + print("saved new best metric model") + print( + "current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format( + epoch + 1, metric, best_metric, best_metric_epoch + ) + ) + writer.add_scalar("val_mean_dice", metric, epoch + 1) + # plot the last model output as GIF image in TensorBoard with the corresponding image and label + plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image") + plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label") + plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output") + + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + writer.close() + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/2d_segmentation/torch/unet_training_dict.py b/2d_segmentation/torch/unet_training_dict.py new file mode 100644 index 0000000000..3945ba7809 --- /dev/null +++ b/2d_segmentation/torch/unet_training_dict.py @@ -0,0 +1,182 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import torch +from PIL import Image +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter + +import monai +from monai.data import create_test_image_2d, list_data_collate +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.transforms import ( + AddChanneld, + Compose, + LoadImaged, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) +from monai.visualize import plot_2d_or_3d_image + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_2d(128, 128, num_seg_classes=1) + Image.fromarray(im.astype("uint8")).save(os.path.join(tempdir, f"img{i:d}.png")) + Image.fromarray(seg.astype("uint8")).save(os.path.join(tempdir, f"seg{i:d}.png")) + + images = sorted(glob(os.path.join(tempdir, "img*.png"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.png"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images[:20], segs[:20])] + val_files = [{"img": img, "seg": seg} for img, seg in zip(images[-20:], segs[-20:])] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadImaged(keys=["img", "seg"]), + AddChanneld(keys=["img", "seg"]), + ScaleIntensityd(keys="img"), + RandCropByPosNegLabeld( + keys=["img", "seg"], label_key="seg", spatial_size=[96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 1]), + ToTensord(keys=["img", "seg"]), + ] + ) + val_transforms = Compose( + [ + LoadImaged(keys=["img", "seg"]), + AddChanneld(keys=["img", "seg"]), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + + # define dataset, data loader + check_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + check_loader = DataLoader(check_ds, batch_size=2, num_workers=4, collate_fn=list_data_collate) + check_data = monai.utils.misc.first(check_loader) + print(check_data["img"].shape, check_data["seg"].shape) + + # create a training data loader + train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader( + train_ds, + batch_size=2, + shuffle=True, + num_workers=4, + collate_fn=list_data_collate, + pin_memory=torch.cuda.is_available(), + ) + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # create UNet, DiceLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.UNet( + dimensions=2, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + + # start a typical PyTorch training + val_interval = 2 + best_metric = -1 + best_metric_epoch = -1 + epoch_loss_values = list() + metric_values = list() + writer = SummaryWriter() + for epoch in range(10): + print("-" * 10) + print(f"epoch {epoch + 1}/{10}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + val_images = None + val_labels = None + val_outputs = None + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device) + roi_size = (96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + metric = metric_sum / metric_count + metric_values.append(metric) + if metric > best_metric: + best_metric = metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), "best_metric_model_segmentation2d_dict.pth") + print("saved new best metric model") + print( + "current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format( + epoch + 1, metric, best_metric, best_metric_epoch + ) + ) + writer.add_scalar("val_mean_dice", metric, epoch + 1) + # plot the last model output as GIF image in TensorBoard with the corresponding image and label + plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image") + plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label") + plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output") + + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + writer.close() + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_classification/ignite/densenet_evaluation_array.py b/3d_classification/ignite/densenet_evaluation_array.py new file mode 100644 index 0000000000..9692fecfda --- /dev/null +++ b/3d_classification/ignite/densenet_evaluation_array.py @@ -0,0 +1,94 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from ignite.engine import _prepare_batch, create_supervised_evaluator +from ignite.metrics import Accuracy +from torch.utils.data import DataLoader + +import monai +from monai.data import NiftiDataset +from monai.handlers import CheckpointLoader, ClassificationSaver, StatsHandler +from monai.transforms import AddChannel, Compose, Resize, ScaleIntensity, ToTensor + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + + # define transforms for image + val_transforms = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), ToTensor()]) + # define nifti dataset + val_ds = NiftiDataset(image_files=images, labels=labels, transform=val_transforms, image_only=False) + # create DenseNet121 + net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + metric_name = "Accuracy" + # add evaluation metric to the evaluator engine + val_metrics = {metric_name: Accuracy()} + + def prepare_batch(batch, device=None, non_blocking=False): + return _prepare_batch((batch[0], batch[1]), device, non_blocking) + + # Ignite evaluator expects batch=(img, label) and returns output=(y_pred, y) at every iteration, + # user can add output_transform to return other values + evaluator = create_supervised_evaluator(net, val_metrics, device, True, prepare_batch=prepare_batch) + + # add stats event handler to print validation stats via evaluator + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + ) + val_stats_handler.attach(evaluator) + + # for the array data format, assume the 3rd item of batch data is the meta_data + prediction_saver = ClassificationSaver( + output_dir="tempdir", + batch_transform=lambda batch: batch[2], + output_transform=lambda output: output[0].argmax(1), + ) + prediction_saver.attach(evaluator) + + # the model was trained by "densenet_training_array" example + CheckpointLoader(load_path="./runs_array/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator) + + # create a validation data loader + val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + + state = evaluator.run(val_loader) + print(state) + + +if __name__ == "__main__": + main() diff --git a/3d_classification/ignite/densenet_evaluation_dict.py b/3d_classification/ignite/densenet_evaluation_dict.py new file mode 100644 index 0000000000..69b917be2d --- /dev/null +++ b/3d_classification/ignite/densenet_evaluation_dict.py @@ -0,0 +1,102 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from ignite.engine import _prepare_batch, create_supervised_evaluator +from ignite.metrics import Accuracy +from torch.utils.data import DataLoader + +import monai +from monai.handlers import CheckpointLoader, ClassificationSaver, StatsHandler +from monai.transforms import AddChanneld, Compose, LoadNiftid, Resized, ScaleIntensityd, ToTensord + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + val_files = [{"img": img, "label": label} for img, label in zip(images, labels)] + + # define transforms for image + val_transforms = Compose( + [ + LoadNiftid(keys=["img"]), + AddChanneld(keys=["img"]), + ScaleIntensityd(keys=["img"]), + Resized(keys=["img"], spatial_size=(96, 96, 96)), + ToTensord(keys=["img"]), + ] + ) + + # create DenseNet121 + net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + def prepare_batch(batch, device=None, non_blocking=False): + return _prepare_batch((batch["img"], batch["label"]), device, non_blocking) + + metric_name = "Accuracy" + # add evaluation metric to the evaluator engine + val_metrics = {metric_name: Accuracy()} + # Ignite evaluator expects batch=(img, label) and returns output=(y_pred, y) at every iteration, + # user can add output_transform to return other values + evaluator = create_supervised_evaluator(net, val_metrics, device, True, prepare_batch=prepare_batch) + + # add stats event handler to print validation stats via evaluator + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + ) + val_stats_handler.attach(evaluator) + + # for the array data format, assume the 3rd item of batch data is the meta_data + prediction_saver = ClassificationSaver( + output_dir="tempdir", + name="evaluator", + batch_transform=lambda batch: batch["img_meta_dict"], + output_transform=lambda output: output[0].argmax(1), + ) + prediction_saver.attach(evaluator) + + # the model was trained by "densenet_training_dict" example + CheckpointLoader(load_path="./runs_dict/net_checkpoint_20.pth", load_dict={"net": net}).attach(evaluator) + + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + + state = evaluator.run(val_loader) + print(state) + + +if __name__ == "__main__": + main() diff --git a/3d_classification/ignite/densenet_training_array.py b/3d_classification/ignite/densenet_training_array.py new file mode 100644 index 0000000000..a57f28f0b8 --- /dev/null +++ b/3d_classification/ignite/densenet_training_array.py @@ -0,0 +1,144 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer +from ignite.handlers import EarlyStopping, ModelCheckpoint +from ignite.metrics import Accuracy +from torch.utils.data import DataLoader + +import monai +from monai.data import NiftiDataset +from monai.handlers import StatsHandler, TensorBoardStatsHandler, stopping_fn_from_metric +from monai.transforms import AddChannel, Compose, RandRotate90, Resize, ScaleIntensity, ToTensor + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI314-IOP-0889-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI249-Guys-1072-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI609-HH-2600-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI173-HH-1590-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI020-Guys-0700-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI342-Guys-0909-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI134-Guys-0780-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI577-HH-2661-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI066-Guys-0731-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI130-HH-1528-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + + # define transforms + train_transforms = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), RandRotate90(), ToTensor()]) + val_transforms = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), ToTensor()]) + + # define nifti dataset, data loader + check_ds = NiftiDataset(image_files=images, labels=labels, transform=train_transforms) + check_loader = DataLoader(check_ds, batch_size=2, num_workers=2, pin_memory=torch.cuda.is_available()) + im, label = monai.utils.misc.first(check_loader) + print(type(im), im.shape, label) + + # create DenseNet121, CrossEntropyLoss and Adam optimizer + net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2) + loss = torch.nn.CrossEntropyLoss() + lr = 1e-5 + opt = torch.optim.Adam(net.parameters(), lr) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Ignite trainer expects batch=(img, label) and returns output=loss at every iteration, + # user can add output_transform to return other values, like: y_pred, y, etc. + trainer = create_supervised_trainer(net, opt, loss, device, False) + + # adding checkpoint handler to save models (network params and optimizer stats) during training + checkpoint_handler = ModelCheckpoint("./runs_array/", "net", n_saved=10, require_empty=False) + trainer.add_event_handler( + event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={"net": net, "opt": opt} + ) + + # StatsHandler prints loss at every iteration and print metrics at every epoch, + # we don't set metrics for trainer here, so just print loss, user can also customize print functions + # and can use output_transform to convert engine.state.output if it's not loss value + train_stats_handler = StatsHandler(name="trainer") + train_stats_handler.attach(trainer) + + # TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler + train_tensorboard_stats_handler = TensorBoardStatsHandler() + train_tensorboard_stats_handler.attach(trainer) + + # set parameters for validation + validation_every_n_epochs = 1 + + metric_name = "Accuracy" + # add evaluation metric to the evaluator engine + val_metrics = {metric_name: Accuracy()} + # Ignite evaluator expects batch=(img, label) and returns output=(y_pred, y) at every iteration, + # user can add output_transform to return other values + evaluator = create_supervised_evaluator(net, val_metrics, device, True) + + # add stats event handler to print validation stats via evaluator + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_stats_handler.attach(evaluator) + + # add handler to record metrics to TensorBoard at every epoch + val_tensorboard_stats_handler = TensorBoardStatsHandler( + output_transform=lambda x: None, # no need to plot loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_tensorboard_stats_handler.attach(evaluator) + + # add early stopping handler to evaluator + early_stopper = EarlyStopping(patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer) + evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=early_stopper) + + # create a validation data loader + val_ds = NiftiDataset(image_files=images[-10:], labels=labels[-10:], transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=2, num_workers=2, pin_memory=torch.cuda.is_available()) + + @trainer.on(Events.EPOCH_COMPLETED(every=validation_every_n_epochs)) + def run_validation(engine): + evaluator.run(val_loader) + + # create a training data loader + train_ds = NiftiDataset(image_files=images[:10], labels=labels[:10], transform=train_transforms) + train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=2, pin_memory=torch.cuda.is_available()) + + train_epochs = 30 + state = trainer.run(train_loader, train_epochs) + print(state) + + +if __name__ == "__main__": + main() diff --git a/3d_classification/ignite/densenet_training_dict.py b/3d_classification/ignite/densenet_training_dict.py new file mode 100644 index 0000000000..e116542386 --- /dev/null +++ b/3d_classification/ignite/densenet_training_dict.py @@ -0,0 +1,166 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from ignite.engine import Events, _prepare_batch, create_supervised_evaluator, create_supervised_trainer +from ignite.handlers import EarlyStopping, ModelCheckpoint +from ignite.metrics import Accuracy +from torch.utils.data import DataLoader + +import monai +from monai.handlers import ROCAUC, StatsHandler, TensorBoardStatsHandler, stopping_fn_from_metric +from monai.transforms import AddChanneld, Compose, LoadNiftid, RandRotate90d, Resized, ScaleIntensityd, ToTensord + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI314-IOP-0889-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI249-Guys-1072-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI609-HH-2600-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI173-HH-1590-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI020-Guys-0700-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI342-Guys-0909-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI134-Guys-0780-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI577-HH-2661-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI066-Guys-0731-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI130-HH-1528-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + train_files = [{"img": img, "label": label} for img, label in zip(images[:10], labels[:10])] + val_files = [{"img": img, "label": label} for img, label in zip(images[-10:], labels[-10:])] + + # define transforms for image + train_transforms = Compose( + [ + LoadNiftid(keys=["img"]), + AddChanneld(keys=["img"]), + ScaleIntensityd(keys=["img"]), + Resized(keys=["img"], spatial_size=(96, 96, 96)), + RandRotate90d(keys=["img"], prob=0.8, spatial_axes=[0, 2]), + ToTensord(keys=["img"]), + ] + ) + val_transforms = Compose( + [ + LoadNiftid(keys=["img"]), + AddChanneld(keys=["img"]), + ScaleIntensityd(keys=["img"]), + Resized(keys=["img"], spatial_size=(96, 96, 96)), + ToTensord(keys=["img"]), + ] + ) + + # define dataset, data loader + check_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + check_loader = DataLoader(check_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + check_data = monai.utils.misc.first(check_loader) + print(check_data["img"].shape, check_data["label"]) + + # create DenseNet121, CrossEntropyLoss and Adam optimizer + net = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2) + loss = torch.nn.CrossEntropyLoss() + lr = 1e-5 + opt = torch.optim.Adam(net.parameters(), lr) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Ignite trainer expects batch=(img, label) and returns output=loss at every iteration, + # user can add output_transform to return other values, like: y_pred, y, etc. + def prepare_batch(batch, device=None, non_blocking=False): + + return _prepare_batch((batch["img"], batch["label"]), device, non_blocking) + + trainer = create_supervised_trainer(net, opt, loss, device, False, prepare_batch=prepare_batch) + + # adding checkpoint handler to save models (network params and optimizer stats) during training + checkpoint_handler = ModelCheckpoint("./runs_dict/", "net", n_saved=10, require_empty=False) + trainer.add_event_handler( + event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={"net": net, "opt": opt} + ) + + # StatsHandler prints loss at every iteration and print metrics at every epoch, + # we don't set metrics for trainer here, so just print loss, user can also customize print functions + # and can use output_transform to convert engine.state.output if it's not loss value + train_stats_handler = StatsHandler(name="trainer") + train_stats_handler.attach(trainer) + + # TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler + train_tensorboard_stats_handler = TensorBoardStatsHandler() + train_tensorboard_stats_handler.attach(trainer) + + # set parameters for validation + validation_every_n_epochs = 1 + + metric_name = "Accuracy" + # add evaluation metric to the evaluator engine + val_metrics = {metric_name: Accuracy(), "AUC": ROCAUC(to_onehot_y=True, softmax=True)} + # Ignite evaluator expects batch=(img, label) and returns output=(y_pred, y) at every iteration, + # user can add output_transform to return other values + evaluator = create_supervised_evaluator(net, val_metrics, device, True, prepare_batch=prepare_batch) + + # add stats event handler to print validation stats via evaluator + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_stats_handler.attach(evaluator) + + # add handler to record metrics to TensorBoard at every epoch + val_tensorboard_stats_handler = TensorBoardStatsHandler( + output_transform=lambda x: None, # no need to plot loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_tensorboard_stats_handler.attach(evaluator) + + # add early stopping handler to evaluator + early_stopper = EarlyStopping(patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer) + evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=early_stopper) + + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + + @trainer.on(Events.EPOCH_COMPLETED(every=validation_every_n_epochs)) + def run_validation(engine): + evaluator.run(val_loader) + + # create a training data loader + train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4, pin_memory=torch.cuda.is_available()) + + train_epochs = 30 + state = trainer.run(train_loader, train_epochs) + print(state) + + +if __name__ == "__main__": + main() diff --git a/3d_classification/torch/densenet_evaluation_array.py b/3d_classification/torch/densenet_evaluation_array.py new file mode 100644 index 0000000000..43428ba4d5 --- /dev/null +++ b/3d_classification/torch/densenet_evaluation_array.py @@ -0,0 +1,77 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from torch.utils.data import DataLoader + +import monai +from monai.data import CSVSaver, NiftiDataset +from monai.transforms import AddChannel, Compose, Resize, ScaleIntensity, ToTensor + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + + # Define transforms for image + val_transforms = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), ToTensor()]) + + # Define nifti dataset + val_ds = NiftiDataset(image_files=images, labels=labels, transform=val_transforms, image_only=False) + # create a validation data loader + val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + + # Create DenseNet121 + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device) + + model.load_state_dict(torch.load("best_metric_model_classification3d_array.pth")) + model.eval() + with torch.no_grad(): + num_correct = 0.0 + metric_count = 0 + saver = CSVSaver(output_dir="./output") + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + val_outputs = model(val_images).argmax(dim=1) + value = torch.eq(val_outputs, val_labels) + metric_count += len(value) + num_correct += value.sum().item() + saver.save_batch(val_outputs, val_data[2]) + metric = num_correct / metric_count + print("evaluation metric:", metric) + saver.finalize() + + +if __name__ == "__main__": + main() diff --git a/3d_classification/torch/densenet_evaluation_dict.py b/3d_classification/torch/densenet_evaluation_dict.py new file mode 100644 index 0000000000..0d6b2420fb --- /dev/null +++ b/3d_classification/torch/densenet_evaluation_dict.py @@ -0,0 +1,85 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from torch.utils.data import DataLoader + +import monai +from monai.data import CSVSaver +from monai.transforms import AddChanneld, Compose, LoadNiftid, Resized, ScaleIntensityd, ToTensord + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + val_files = [{"img": img, "label": label} for img, label in zip(images, labels)] + + # Define transforms for image + val_transforms = Compose( + [ + LoadNiftid(keys=["img"]), + AddChanneld(keys=["img"]), + ScaleIntensityd(keys=["img"]), + Resized(keys=["img"], spatial_size=(96, 96, 96)), + ToTensord(keys=["img"]), + ] + ) + + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + + # Create DenseNet121 + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device) + + model.load_state_dict(torch.load("best_metric_model_classification3d_dict.pth")) + model.eval() + with torch.no_grad(): + num_correct = 0.0 + metric_count = 0 + saver = CSVSaver(output_dir="./output") + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["label"].to(device) + val_outputs = model(val_images).argmax(dim=1) + value = torch.eq(val_outputs, val_labels) + metric_count += len(value) + num_correct += value.sum().item() + saver.save_batch(val_outputs, val_data["img_meta_dict"]) + metric = num_correct / metric_count + print("evaluation metric:", metric) + saver.finalize() + + +if __name__ == "__main__": + main() diff --git a/3d_classification/torch/densenet_training_array.py b/3d_classification/torch/densenet_training_array.py new file mode 100644 index 0000000000..a0712a4a57 --- /dev/null +++ b/3d_classification/torch/densenet_training_array.py @@ -0,0 +1,139 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter + +import monai +from monai.data import NiftiDataset +from monai.transforms import AddChannel, Compose, RandRotate90, Resize, ScaleIntensity, ToTensor + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI314-IOP-0889-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI249-Guys-1072-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI609-HH-2600-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI173-HH-1590-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI020-Guys-0700-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI342-Guys-0909-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI134-Guys-0780-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI577-HH-2661-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI066-Guys-0731-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI130-HH-1528-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + + # Define transforms + train_transforms = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), RandRotate90(), ToTensor()]) + val_transforms = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), ToTensor()]) + + # Define nifti dataset, data loader + check_ds = NiftiDataset(image_files=images, labels=labels, transform=train_transforms) + check_loader = DataLoader(check_ds, batch_size=2, num_workers=2, pin_memory=torch.cuda.is_available()) + im, label = monai.utils.misc.first(check_loader) + print(type(im), im.shape, label) + + # create a training data loader + train_ds = NiftiDataset(image_files=images[:10], labels=labels[:10], transform=train_transforms) + train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=2, pin_memory=torch.cuda.is_available()) + + # create a validation data loader + val_ds = NiftiDataset(image_files=images[-10:], labels=labels[-10:], transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=2, num_workers=2, pin_memory=torch.cuda.is_available()) + + # Create DenseNet121, CrossEntropyLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device) + loss_function = torch.nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), 1e-5) + + # start a typical PyTorch training + val_interval = 2 + best_metric = -1 + best_metric_epoch = -1 + epoch_loss_values = list() + metric_values = list() + writer = SummaryWriter() + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data[0].to(device), batch_data[1].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + num_correct = 0.0 + metric_count = 0 + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + val_outputs = model(val_images) + value = torch.eq(val_outputs.argmax(dim=1), val_labels) + metric_count += len(value) + num_correct += value.sum().item() + metric = num_correct / metric_count + metric_values.append(metric) + if metric > best_metric: + best_metric = metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), "best_metric_model_classification3d_array.pth") + print("saved new best metric model") + print( + "current epoch: {} current accuracy: {:.4f} best accuracy: {:.4f} at epoch {}".format( + epoch + 1, metric, best_metric, best_metric_epoch + ) + ) + writer.add_scalar("val_accuracy", metric, epoch + 1) + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + writer.close() + + +if __name__ == "__main__": + main() diff --git a/3d_classification/torch/densenet_training_dict.py b/3d_classification/torch/densenet_training_dict.py new file mode 100644 index 0000000000..6d4a590e87 --- /dev/null +++ b/3d_classification/torch/densenet_training_dict.py @@ -0,0 +1,155 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys + +import numpy as np +import torch +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter + +import monai +from monai.metrics import compute_roc_auc +from monai.transforms import AddChanneld, Compose, LoadNiftid, RandRotate90d, Resized, ScaleIntensityd, ToTensord + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # IXI dataset as a demo, downloadable from https://brain-development.org/ixi-dataset/ + images = [ + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI314-IOP-0889-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI249-Guys-1072-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI609-HH-2600-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI173-HH-1590-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI020-Guys-0700-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI342-Guys-0909-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI134-Guys-0780-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI577-HH-2661-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI066-Guys-0731-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI130-HH-1528-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI607-Guys-1097-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI175-HH-1570-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI385-HH-2078-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI344-Guys-0905-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI409-Guys-0960-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI584-Guys-1129-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI253-HH-1694-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI092-HH-1436-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI574-IOP-1156-T1.nii.gz"]), + os.sep.join(["workspace", "data", "medical", "ixi", "IXI-T1", "IXI585-Guys-1130-T1.nii.gz"]), + ] + + # 2 binary labels for gender classification: man and woman + labels = np.array([0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=np.int64) + train_files = [{"img": img, "label": label} for img, label in zip(images[:10], labels[:10])] + val_files = [{"img": img, "label": label} for img, label in zip(images[-10:], labels[-10:])] + + # Define transforms for image + train_transforms = Compose( + [ + LoadNiftid(keys=["img"]), + AddChanneld(keys=["img"]), + ScaleIntensityd(keys=["img"]), + Resized(keys=["img"], spatial_size=(96, 96, 96)), + RandRotate90d(keys=["img"], prob=0.8, spatial_axes=[0, 2]), + ToTensord(keys=["img"]), + ] + ) + val_transforms = Compose( + [ + LoadNiftid(keys=["img"]), + AddChanneld(keys=["img"]), + ScaleIntensityd(keys=["img"]), + Resized(keys=["img"], spatial_size=(96, 96, 96)), + ToTensord(keys=["img"]), + ] + ) + + # Define dataset, data loader + check_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + check_loader = DataLoader(check_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + check_data = monai.utils.misc.first(check_loader) + print(check_data["img"].shape, check_data["label"]) + + # create a training data loader + train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4, pin_memory=torch.cuda.is_available()) + + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=2, num_workers=4, pin_memory=torch.cuda.is_available()) + + # Create DenseNet121, CrossEntropyLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=1, out_channels=2).to(device) + loss_function = torch.nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), 1e-5) + + # start a typical PyTorch training + val_interval = 2 + best_metric = -1 + best_metric_epoch = -1 + writer = SummaryWriter() + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data["img"].to(device), batch_data["label"].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) + epoch_loss /= step + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + y_pred = torch.tensor([], dtype=torch.float32, device=device) + y = torch.tensor([], dtype=torch.long, device=device) + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["label"].to(device) + y_pred = torch.cat([y_pred, model(val_images)], dim=0) + y = torch.cat([y, val_labels], dim=0) + + acc_value = torch.eq(y_pred.argmax(dim=1), y) + acc_metric = acc_value.sum().item() / len(acc_value) + auc_metric = compute_roc_auc(y_pred, y, to_onehot_y=True, softmax=True) + if acc_metric > best_metric: + best_metric = acc_metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), "best_metric_model_classification3d_dict.pth") + print("saved new best metric model") + print( + "current epoch: {} current accuracy: {:.4f} current AUC: {:.4f} best accuracy: {:.4f} at epoch {}".format( + epoch + 1, acc_metric, auc_metric, best_metric, best_metric_epoch + ) + ) + writer.add_scalar("val_accuracy", acc_metric, epoch + 1) + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + writer.close() + + +if __name__ == "__main__": + main() diff --git a/brats_segmentation_3d.ipynb b/3d_segmentation/brats_segmentation_3d.ipynb similarity index 99% rename from brats_segmentation_3d.ipynb rename to 3d_segmentation/brats_segmentation_3d.ipynb index 60f6c686e3..b8ea848d14 100644 --- a/brats_segmentation_3d.ipynb +++ b/3d_segmentation/brats_segmentation_3d.ipynb @@ -27,7 +27,7 @@ "Below figure shows image patches with the tumor sub-regions that are annotated in the different modalities (top left) and the final labels for the whole dataset (right).\n", "(Figure taken from the [BraTS IEEE TMI paper](https://ieeexplore.ieee.org/document/6975210/))\n", "\n", - "![image](./images/brats_tasks.png)\n", + "![image](../figures/brats_tasks.png)\n", "\n", "The image patches show from left to right:\n", "1. the whole tumor (yellow) visible in T2-FLAIR (Fig.A).\n", @@ -35,7 +35,7 @@ "1. the enhancing tumor structures (light blue) visible in T1Gd, surrounding the cystic/necrotic components of the core (green) (Fig. C).\n", "1. The segmentations are combined to generate the final labels of the tumor sub-regions (Fig.D): edema (yellow), non-enhancing solid core (red), necrotic/cystic core (green), enhancing core (blue).\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/brats_segmentation_3d.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/3d_segmentation/brats_segmentation_3d.ipynb)" ] }, { diff --git a/3d_segmentation/ignite/unet_evaluation_array.py b/3d_segmentation/ignite/unet_evaluation_array.py new file mode 100644 index 0000000000..20d3661d97 --- /dev/null +++ b/3d_segmentation/ignite/unet_evaluation_array.py @@ -0,0 +1,113 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from ignite.engine import Engine +from torch.utils.data import DataLoader + +from monai import config +from monai.data import NiftiDataset, create_test_image_3d +from monai.handlers import CheckpointLoader, MeanDice, SegmentationSaver, StatsHandler +from monai.inferers import sliding_window_inference +from monai.networks import predict_segmentation +from monai.networks.nets import UNet +from monai.transforms import AddChannel, Compose, ScaleIntensity, ToTensor + + +def main(tempdir): + config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + + # define transforms for image and segmentation + imtrans = Compose([ScaleIntensity(), AddChannel(), ToTensor()]) + segtrans = Compose([AddChannel(), ToTensor()]) + ds = NiftiDataset(images, segs, transform=imtrans, seg_transform=segtrans, image_only=False) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + net = UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ) + net.to(device) + + # define sliding window size and batch size for windows inference + roi_size = (96, 96, 96) + sw_batch_size = 4 + + def _sliding_window_processor(engine, batch): + net.eval() + with torch.no_grad(): + val_images, val_labels = batch[0].to(device), batch[1].to(device) + seg_probs = sliding_window_inference(val_images, roi_size, sw_batch_size, net) + return seg_probs, val_labels + + evaluator = Engine(_sliding_window_processor) + + # add evaluation metric to the evaluator engine + MeanDice(sigmoid=True, to_onehot_y=False).attach(evaluator, "Mean_Dice") + + # StatsHandler prints loss at every iteration and print metrics at every epoch, + # we don't need to print loss for evaluator, so just print metrics, user can also customize print functions + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + ) + val_stats_handler.attach(evaluator) + + # for the array data format, assume the 3rd item of batch data is the meta_data + file_saver = SegmentationSaver( + output_dir="tempdir", + output_ext=".nii.gz", + output_postfix="seg", + name="evaluator", + batch_transform=lambda x: x[2], + output_transform=lambda output: predict_segmentation(output[0]), + ) + file_saver.attach(evaluator) + + # the model was trained by "unet_training_array" example + ckpt_saver = CheckpointLoader(load_path="./runs_array/net_checkpoint_100.pth", load_dict={"net": net}) + ckpt_saver.attach(evaluator) + + # sliding window inference for one image at every iteration + loader = DataLoader(ds, batch_size=1, num_workers=1, pin_memory=torch.cuda.is_available()) + state = evaluator.run(loader) + print(state) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_segmentation/ignite/unet_evaluation_dict.py b/3d_segmentation/ignite/unet_evaluation_dict.py new file mode 100644 index 0000000000..5dbc305661 --- /dev/null +++ b/3d_segmentation/ignite/unet_evaluation_dict.py @@ -0,0 +1,119 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from ignite.engine import Engine +from torch.utils.data import DataLoader + +import monai +from monai.data import create_test_image_3d, list_data_collate +from monai.handlers import CheckpointLoader, MeanDice, SegmentationSaver, StatsHandler +from monai.inferers import sliding_window_inference +from monai.networks import predict_segmentation +from monai.networks.nets import UNet +from monai.transforms import AsChannelFirstd, Compose, LoadNiftid, ScaleIntensityd, ToTensord + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + val_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + net = UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ) + net.to(device) + + # define sliding window size and batch size for windows inference + roi_size = (96, 96, 96) + sw_batch_size = 4 + + def _sliding_window_processor(engine, batch): + net.eval() + with torch.no_grad(): + val_images, val_labels = batch["img"].to(device), batch["seg"].to(device) + seg_probs = sliding_window_inference(val_images, roi_size, sw_batch_size, net) + return seg_probs, val_labels + + evaluator = Engine(_sliding_window_processor) + + # add evaluation metric to the evaluator engine + MeanDice(sigmoid=True, to_onehot_y=False).attach(evaluator, "Mean_Dice") + + # StatsHandler prints loss at every iteration and print metrics at every epoch, + # we don't need to print loss for evaluator, so just print metrics, user can also customize print functions + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + ) + val_stats_handler.attach(evaluator) + + # convert the necessary metadata from batch data + SegmentationSaver( + output_dir="tempdir", + output_ext=".nii.gz", + output_postfix="seg", + name="evaluator", + batch_transform=lambda batch: batch["img_meta_dict"], + output_transform=lambda output: predict_segmentation(output[0]), + ).attach(evaluator) + # the model was trained by "unet_training_dict" example + CheckpointLoader(load_path="./runs_dict/net_checkpoint_50.pth", load_dict={"net": net}).attach(evaluator) + + # sliding window inference for one image at every iteration + val_loader = DataLoader( + val_ds, batch_size=1, num_workers=4, collate_fn=list_data_collate, pin_memory=torch.cuda.is_available() + ) + state = evaluator.run(val_loader) + print(state) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_segmentation/ignite/unet_training_array.py b/3d_segmentation/ignite/unet_training_array.py new file mode 100644 index 0000000000..bf4c9d21e8 --- /dev/null +++ b/3d_segmentation/ignite/unet_training_array.py @@ -0,0 +1,160 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer +from ignite.handlers import EarlyStopping, ModelCheckpoint +from torch.utils.data import DataLoader + +import monai +from monai.data import NiftiDataset, create_test_image_3d +from monai.handlers import ( + MeanDice, + StatsHandler, + TensorBoardImageHandler, + TensorBoardStatsHandler, + stopping_fn_from_metric, +) +from monai.networks import predict_segmentation +from monai.transforms import AddChannel, Compose, RandSpatialCrop, Resize, ScaleIntensity, ToTensor + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + + # define transforms for image and segmentation + train_imtrans = Compose( + [ScaleIntensity(), AddChannel(), RandSpatialCrop((96, 96, 96), random_size=False), ToTensor()] + ) + train_segtrans = Compose([AddChannel(), RandSpatialCrop((96, 96, 96), random_size=False), ToTensor()]) + val_imtrans = Compose([ScaleIntensity(), AddChannel(), Resize((96, 96, 96)), ToTensor()]) + val_segtrans = Compose([AddChannel(), Resize((96, 96, 96)), ToTensor()]) + + # define nifti dataset, data loader + check_ds = NiftiDataset(images, segs, transform=train_imtrans, seg_transform=train_segtrans) + check_loader = DataLoader(check_ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()) + im, seg = monai.utils.misc.first(check_loader) + print(im.shape, seg.shape) + + # create a training data loader + train_ds = NiftiDataset(images[:20], segs[:20], transform=train_imtrans, seg_transform=train_segtrans) + train_loader = DataLoader(train_ds, batch_size=5, shuffle=True, num_workers=8, pin_memory=torch.cuda.is_available()) + # create a validation data loader + val_ds = NiftiDataset(images[-20:], segs[-20:], transform=val_imtrans, seg_transform=val_segtrans) + val_loader = DataLoader(val_ds, batch_size=5, num_workers=8, pin_memory=torch.cuda.is_available()) + + # create UNet, DiceLoss and Adam optimizer + net = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ) + loss = monai.losses.DiceLoss(sigmoid=True) + lr = 1e-3 + opt = torch.optim.Adam(net.parameters(), lr) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration, + # user can add output_transform to return other values, like: y_pred, y, etc. + trainer = create_supervised_trainer(net, opt, loss, device, False) + + # adding checkpoint handler to save models (network params and optimizer stats) during training + checkpoint_handler = ModelCheckpoint("./runs_array/", "net", n_saved=10, require_empty=False) + trainer.add_event_handler( + event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={"net": net, "opt": opt} + ) + + # StatsHandler prints loss at every iteration and print metrics at every epoch, + # we don't set metrics for trainer here, so just print loss, user can also customize print functions + # and can use output_transform to convert engine.state.output if it's not a loss value + train_stats_handler = StatsHandler(name="trainer") + train_stats_handler.attach(trainer) + + # TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler + train_tensorboard_stats_handler = TensorBoardStatsHandler() + train_tensorboard_stats_handler.attach(trainer) + + validation_every_n_epochs = 1 + # Set parameters for validation + metric_name = "Mean_Dice" + # add evaluation metric to the evaluator engine + val_metrics = {metric_name: MeanDice(sigmoid=True, to_onehot_y=False)} + + # Ignite evaluator expects batch=(img, seg) and returns output=(y_pred, y) at every iteration, + # user can add output_transform to return other values + evaluator = create_supervised_evaluator(net, val_metrics, device, True) + + @trainer.on(Events.EPOCH_COMPLETED(every=validation_every_n_epochs)) + def run_validation(engine): + evaluator.run(val_loader) + + # add early stopping handler to evaluator + early_stopper = EarlyStopping(patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer) + evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=early_stopper) + + # add stats event handler to print validation stats via evaluator + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_stats_handler.attach(evaluator) + + # add handler to record metrics to TensorBoard at every validation epoch + val_tensorboard_stats_handler = TensorBoardStatsHandler( + output_transform=lambda x: None, # no need to plot loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_tensorboard_stats_handler.attach(evaluator) + + # add handler to draw the first image and the corresponding label and model output in the last batch + # here we draw the 3D output as GIF format along Depth axis, at every validation epoch + val_tensorboard_image_handler = TensorBoardImageHandler( + batch_transform=lambda batch: (batch[0], batch[1]), + output_transform=lambda output: predict_segmentation(output[0]), + global_iter_transform=lambda x: trainer.state.epoch, + ) + evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=val_tensorboard_image_handler) + + train_epochs = 30 + state = trainer.run(train_loader, train_epochs) + print(state) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_segmentation/ignite/unet_training_dict.py b/3d_segmentation/ignite/unet_training_dict.py new file mode 100644 index 0000000000..fcdce7efdd --- /dev/null +++ b/3d_segmentation/ignite/unet_training_dict.py @@ -0,0 +1,200 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from ignite.engine import Events, _prepare_batch, create_supervised_evaluator, create_supervised_trainer +from ignite.handlers import EarlyStopping, ModelCheckpoint +from torch.utils.data import DataLoader + +import monai +from monai.data import create_test_image_3d, list_data_collate +from monai.handlers import ( + MeanDice, + StatsHandler, + TensorBoardImageHandler, + TensorBoardStatsHandler, + stopping_fn_from_metric, +) +from monai.networks import predict_segmentation +from monai.transforms import ( + AsChannelFirstd, + Compose, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images[:20], segs[:20])] + val_files = [{"img": img, "seg": seg} for img, seg in zip(images[-20:], segs[-20:])] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + RandCropByPosNegLabeld( + keys=["img", "seg"], label_key="seg", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["img", "seg"]), + ] + ) + val_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + + # define dataset, data loader + check_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + check_loader = DataLoader( + check_ds, batch_size=2, num_workers=4, collate_fn=list_data_collate, pin_memory=torch.cuda.is_available() + ) + check_data = monai.utils.misc.first(check_loader) + print(check_data["img"].shape, check_data["seg"].shape) + + # create a training data loader + train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader( + train_ds, + batch_size=2, + shuffle=True, + num_workers=4, + collate_fn=list_data_collate, + pin_memory=torch.cuda.is_available(), + ) + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader( + val_ds, batch_size=5, num_workers=8, collate_fn=list_data_collate, pin_memory=torch.cuda.is_available() + ) + + # create UNet, DiceLoss and Adam optimizer + net = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ) + loss = monai.losses.DiceLoss(sigmoid=True) + lr = 1e-3 + opt = torch.optim.Adam(net.parameters(), lr) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration, + # user can add output_transform to return other values, like: y_pred, y, etc. + def prepare_batch(batch, device=None, non_blocking=False): + return _prepare_batch((batch["img"], batch["seg"]), device, non_blocking) + + trainer = create_supervised_trainer(net, opt, loss, device, False, prepare_batch=prepare_batch) + + # adding checkpoint handler to save models (network params and optimizer stats) during training + checkpoint_handler = ModelCheckpoint("./runs_dict/", "net", n_saved=10, require_empty=False) + trainer.add_event_handler( + event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={"net": net, "opt": opt} + ) + + # StatsHandler prints loss at every iteration and print metrics at every epoch, + # we don't set metrics for trainer here, so just print loss, user can also customize print functions + # and can use output_transform to convert engine.state.output if it's not loss value + train_stats_handler = StatsHandler(name="trainer") + train_stats_handler.attach(trainer) + + # TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler + train_tensorboard_stats_handler = TensorBoardStatsHandler() + train_tensorboard_stats_handler.attach(trainer) + + validation_every_n_iters = 5 + # set parameters for validation + metric_name = "Mean_Dice" + # add evaluation metric to the evaluator engine + val_metrics = {metric_name: MeanDice(sigmoid=True, to_onehot_y=False)} + + # Ignite evaluator expects batch=(img, seg) and returns output=(y_pred, y) at every iteration, + # user can add output_transform to return other values + evaluator = create_supervised_evaluator(net, val_metrics, device, True, prepare_batch=prepare_batch) + + @trainer.on(Events.ITERATION_COMPLETED(every=validation_every_n_iters)) + def run_validation(engine): + evaluator.run(val_loader) + + # add early stopping handler to evaluator + early_stopper = EarlyStopping(patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer) + evaluator.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=early_stopper) + + # add stats event handler to print validation stats via evaluator + val_stats_handler = StatsHandler( + name="evaluator", + output_transform=lambda x: None, # no need to print loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.epoch, + ) # fetch global epoch number from trainer + val_stats_handler.attach(evaluator) + + # add handler to record metrics to TensorBoard at every validation epoch + val_tensorboard_stats_handler = TensorBoardStatsHandler( + output_transform=lambda x: None, # no need to plot loss value, so disable per iteration output + global_epoch_transform=lambda x: trainer.state.iteration, + ) # fetch global iteration number from trainer + val_tensorboard_stats_handler.attach(evaluator) + + # add handler to draw the first image and the corresponding label and model output in the last batch + # here we draw the 3D output as GIF format along the depth axis, every 2 validation iterations. + val_tensorboard_image_handler = TensorBoardImageHandler( + batch_transform=lambda batch: (batch["img"], batch["seg"]), + output_transform=lambda output: predict_segmentation(output[0]), + global_iter_transform=lambda x: trainer.state.epoch, + ) + evaluator.add_event_handler(event_name=Events.ITERATION_COMPLETED(every=2), handler=val_tensorboard_image_handler) + + train_epochs = 5 + state = trainer.run(train_loader, train_epochs) + print(state) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/spleen_segmentation_3d.ipynb b/3d_segmentation/spleen_segmentation_3d.ipynb similarity index 99% rename from spleen_segmentation_3d.ipynb rename to 3d_segmentation/spleen_segmentation_3d.ipynb index ee1d2d733f..a9b151140d 100644 --- a/spleen_segmentation_3d.ipynb +++ b/3d_segmentation/spleen_segmentation_3d.ipynb @@ -29,7 +29,7 @@ "Source: Memorial Sloan Kettering Cancer Center \n", "Challenge: Large ranging foreground size\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/spleen_segmentation_3d.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/3d_segmentation/spleen_segmentation_3d.ipynb)" ] }, { @@ -765,7 +765,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/spleen_segmentation_3d_lightning.ipynb b/3d_segmentation/spleen_segmentation_3d_lightning.ipynb similarity index 99% rename from spleen_segmentation_3d_lightning.ipynb rename to 3d_segmentation/spleen_segmentation_3d_lightning.ipynb index 4128300909..648a7f26ef 100644 --- a/spleen_segmentation_3d_lightning.ipynb +++ b/3d_segmentation/spleen_segmentation_3d_lightning.ipynb @@ -34,7 +34,7 @@ "Source: Memorial Sloan Kettering Cancer Center \n", "Challenge: Large ranging foreground size\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/spleen_segmentation_3d_lightning.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/3d_segmentation/spleen_segmentation_3d_lightning.ipynb)" ] }, { @@ -708,7 +708,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/3d_segmentation/torch/unet_evaluation_array.py b/3d_segmentation/torch/unet_evaluation_array.py new file mode 100644 index 0000000000..09fcf42103 --- /dev/null +++ b/3d_segmentation/torch/unet_evaluation_array.py @@ -0,0 +1,89 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from torch.utils.data import DataLoader + +from monai import config +from monai.data import NiftiDataset, NiftiSaver, create_test_image_3d +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.networks.nets import UNet +from monai.transforms import AddChannel, Compose, ScaleIntensity, ToTensor + + +def main(tempdir): + config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + + # define transforms for image and segmentation + imtrans = Compose([ScaleIntensity(), AddChannel(), ToTensor()]) + segtrans = Compose([AddChannel(), ToTensor()]) + val_ds = NiftiDataset(images, segs, transform=imtrans, seg_transform=segtrans, image_only=False) + # sliding window inference for one image at every iteration + val_loader = DataLoader(val_ds, batch_size=1, num_workers=1, pin_memory=torch.cuda.is_available()) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + + model.load_state_dict(torch.load("best_metric_model_segmentation3d_array.pth")) + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + saver = NiftiSaver(output_dir="./output") + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + # define sliding window size and batch size for windows inference + roi_size = (96, 96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + val_outputs = (val_outputs.sigmoid() >= 0.5).float() + saver.save_batch(val_outputs, val_data[2]) + metric = metric_sum / metric_count + print("evaluation metric:", metric) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_segmentation/torch/unet_evaluation_dict.py b/3d_segmentation/torch/unet_evaluation_dict.py new file mode 100644 index 0000000000..cc39e82232 --- /dev/null +++ b/3d_segmentation/torch/unet_evaluation_dict.py @@ -0,0 +1,103 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from torch.utils.data import DataLoader + +import monai +from monai.data import NiftiSaver, create_test_image_3d, list_data_collate +from monai.engines import get_devices_spec +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.networks.nets import UNet +from monai.transforms import AsChannelFirstd, Compose, LoadNiftid, ScaleIntensityd, ToTensord + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + val_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + # sliding window inference need to input 1 image in every iteration + val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # try to use all the available GPUs + devices = get_devices_spec(None) + model = UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(devices[0]) + + model.load_state_dict(torch.load("best_metric_model_segmentation3d_dict.pth")) + + # if we have multiple GPUs, set data parallel to execute sliding window inference + if len(devices) > 1: + model = torch.nn.DataParallel(model, device_ids=devices) + + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + saver = NiftiSaver(output_dir="./output") + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(devices[0]), val_data["seg"].to(devices[0]) + # define sliding window size and batch size for windows inference + roi_size = (96, 96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + val_outputs = (val_outputs.sigmoid() >= 0.5).float() + saver.save_batch(val_outputs, val_data["img_meta_dict"]) + metric = metric_sum / metric_count + print("evaluation metric:", metric) + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_segmentation/torch/unet_training_array.py b/3d_segmentation/torch/unet_training_array.py new file mode 100644 index 0000000000..1fc82089a9 --- /dev/null +++ b/3d_segmentation/torch/unet_training_array.py @@ -0,0 +1,167 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter + +import monai +from monai.data import NiftiDataset, create_test_image_3d +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.transforms import AddChannel, Compose, RandRotate90, RandSpatialCrop, ScaleIntensity, ToTensor +from monai.visualize import plot_2d_or_3d_image + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + + # define transforms for image and segmentation + train_imtrans = Compose( + [ + ScaleIntensity(), + AddChannel(), + RandSpatialCrop((96, 96, 96), random_size=False), + RandRotate90(prob=0.5, spatial_axes=(0, 2)), + ToTensor(), + ] + ) + train_segtrans = Compose( + [ + AddChannel(), + RandSpatialCrop((96, 96, 96), random_size=False), + RandRotate90(prob=0.5, spatial_axes=(0, 2)), + ToTensor(), + ] + ) + val_imtrans = Compose([ScaleIntensity(), AddChannel(), ToTensor()]) + val_segtrans = Compose([AddChannel(), ToTensor()]) + + # define nifti dataset, data loader + check_ds = NiftiDataset(images, segs, transform=train_imtrans, seg_transform=train_segtrans) + check_loader = DataLoader(check_ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()) + im, seg = monai.utils.misc.first(check_loader) + print(im.shape, seg.shape) + + # create a training data loader + train_ds = NiftiDataset(images[:20], segs[:20], transform=train_imtrans, seg_transform=train_segtrans) + train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=8, pin_memory=torch.cuda.is_available()) + # create a validation data loader + val_ds = NiftiDataset(images[-20:], segs[-20:], transform=val_imtrans, seg_transform=val_segtrans) + val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, pin_memory=torch.cuda.is_available()) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # create UNet, DiceLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + + # start a typical PyTorch training + val_interval = 2 + best_metric = -1 + best_metric_epoch = -1 + epoch_loss_values = list() + metric_values = list() + writer = SummaryWriter() + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data[0].to(device), batch_data[1].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + val_images = None + val_labels = None + val_outputs = None + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + roi_size = (96, 96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + metric = metric_sum / metric_count + metric_values.append(metric) + if metric > best_metric: + best_metric = metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), "best_metric_model_segmentation3d_array.pth") + print("saved new best metric model") + print( + "current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format( + epoch + 1, metric, best_metric, best_metric_epoch + ) + ) + writer.add_scalar("val_mean_dice", metric, epoch + 1) + # plot the last model output as GIF image in TensorBoard with the corresponding image and label + plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image") + plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label") + plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output") + + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + writer.close() + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/3d_segmentation/torch/unet_training_dict.py b/3d_segmentation/torch/unet_training_dict.py new file mode 100644 index 0000000000..381be34409 --- /dev/null +++ b/3d_segmentation/torch/unet_training_dict.py @@ -0,0 +1,187 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter + +import monai +from monai.data import create_test_image_3d, list_data_collate +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.transforms import ( + AsChannelFirstd, + Compose, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) +from monai.visualize import plot_2d_or_3d_image + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz")) + + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images[:20], segs[:20])] + val_files = [{"img": img, "seg": seg} for img, seg in zip(images[-20:], segs[-20:])] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + RandCropByPosNegLabeld( + keys=["img", "seg"], label_key="seg", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["img", "seg"]), + ] + ) + val_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + + # define dataset, data loader + check_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + check_loader = DataLoader(check_ds, batch_size=2, num_workers=4, collate_fn=list_data_collate) + check_data = monai.utils.misc.first(check_loader) + print(check_data["img"].shape, check_data["seg"].shape) + + # create a training data loader + train_ds = monai.data.Dataset(data=train_files, transform=train_transforms) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader( + train_ds, + batch_size=2, + shuffle=True, + num_workers=4, + collate_fn=list_data_collate, + pin_memory=torch.cuda.is_available(), + ) + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # create UNet, DiceLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + + # start a typical PyTorch training + val_interval = 2 + best_metric = -1 + best_metric_epoch = -1 + epoch_loss_values = list() + metric_values = list() + writer = SummaryWriter() + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + metric_sum = 0.0 + metric_count = 0 + val_images = None + val_labels = None + val_outputs = None + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device) + roi_size = (96, 96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels) + metric_count += len(value) + metric_sum += value.item() * len(value) + metric = metric_sum / metric_count + metric_values.append(metric) + if metric > best_metric: + best_metric = metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), "best_metric_model_segmentation3d_dict.pth") + print("saved new best metric model") + print( + "current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format( + epoch + 1, metric, best_metric, best_metric_epoch + ) + ) + writer.add_scalar("val_mean_dice", metric, epoch + 1) + # plot the last model output as GIF image in TensorBoard with the corresponding image and label + plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image") + plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label") + plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output") + + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + writer.close() + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/unet_segmentation_3d_catalyst.ipynb b/3d_segmentation/unet_segmentation_3d_catalyst.ipynb similarity index 99% rename from unet_segmentation_3d_catalyst.ipynb rename to 3d_segmentation/unet_segmentation_3d_catalyst.ipynb index a4ffebdd76..130cf87570 100644 --- a/unet_segmentation_3d_catalyst.ipynb +++ b/3d_segmentation/unet_segmentation_3d_catalyst.ipynb @@ -24,7 +24,7 @@ "\n", "This tutorial is based on [unet_training_dict.py](https://github.com/Project-MONAI/MONAI/blob/master/examples/segmentation_3d/unet_training_dict.py) and [spleen_segmentation_3d.ipynb](https://github.com/Project-MONAI/Tutorials/blob/master/spleen_segmentation_3d.ipynb).\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/unet_segmentation_3d_catalyst.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/3d_segmentation/unet_segmentation_3d_catalyst.ipynb)" ] }, { @@ -653,7 +653,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/unet_segmentation_3d_ignite.ipynb b/3d_segmentation/unet_segmentation_3d_ignite.ipynb similarity index 99% rename from unet_segmentation_3d_ignite.ipynb rename to 3d_segmentation/unet_segmentation_3d_ignite.ipynb index b67bbef971..0ef31ca87b 100644 --- a/unet_segmentation_3d_ignite.ipynb +++ b/3d_segmentation/unet_segmentation_3d_ignite.ipynb @@ -6,7 +6,7 @@ "source": [ "# 3D Segmentation with UNet\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/unet_segmentation_3d_ignite.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/3d_segmentation/unet_segmentation_3d_ignite.ipynb)" ] }, { @@ -478,7 +478,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/README.md b/README.md index 91d178d644..4f619ed50a 100644 --- a/README.md +++ b/README.md @@ -18,65 +18,104 @@ Or install all optional requirements by: ``` pip install -r https://raw.githubusercontent.com/Project-MONAI/MONAI/master/requirements-dev.txt ``` -### 2. List of notebooks -#### [3d_image_transforms](./3d_image_transforms.ipynb) -This notebook demonstrates the transformations on volumetric images. -#### [automatic_mixed_precision](./automatic_mixed_precision.ipynb) +### 2. List of notebooks and examples +**2D classification** +#### [mednist_tutorial](./2d_classification/mednist_tutorial.ipynb) +This notebook shows how to easily integrate MONAI features into existing PyTorch programs. +It's based on the MedNIST dataset which is very suitable for beginners as a tutorial. +The content is also available as [a Colab tutorial](https://colab.research.google.com/drive/1wy8XUSnNWlhDNazFdvGBHLfdkGvOHBKe). + +**2D segmentation** +#### [torch examples](./2d_segmentation/torch) +Training and evaluation examples of 2D segmentation based on UNet and synthetic dataset. +The examples are standard PyTorch programs and have both dictionary-based and array-based versions. + +**3D classification** +#### [ignite examples](./3d_classification/ignite) +Training and evaluation examples of 3D classification based on DenseNet3D and [IXI dataset](https://brain-development.org/ixi-dataset). +The examples are PyTorch Ignite programs and have both dictionary-based and array-based transformation versions. +#### [torch examples](./3d_classification/torch) +Training and evaluation examples of 3D classification based on DenseNet3D and [IXI dataset](https://brain-development.org/ixi-dataset). +The examples are standard PyTorch programs and have both dictionary-based and array-based transformation versions. + +**3D segmentation** +#### [ignite examples](./3d_segmentation/ignite) +Training and evaluation examples of 3D segmentation based on UNet3D and synthetic dataset. +The examples are PyTorch Ignite programs and have both dictionary-base and array-based transformations. +#### [torch examples](./3d_segmentation/torch) +Training and evaluation examples of 3D segmentation based on UNet3D and synthetic dataset. +The examples are standard PyTorch programs and have both dictionary-based and array-based versions. +#### [brats_segmentation_3d](./3d_segmentation/brats_segmentation_3d.ipynb) +This tutorial shows how to construct a training workflow of multi-labels segmentation task based on [MSD Brain Tumor dataset](http://medicaldecathlon.com). +#### [spleen_segmentation_3d_lightning](./3d_segmentation/spleen_segmentation_3d_lightning.ipynb) +This notebook shows how MONAI may be used in conjunction with the [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning) framework. +#### [spleen_segmentation_3d](./3d_segmentation/spleen_segmentation_3d.ipynb) +This notebook is an end-to-end training and evaluation example of 3D segmentation based on [MSD Spleen dataset](http://medicaldecathlon.com). +The example shows the flexibility of MONAI modules in a PyTorch-based program: +- Transforms for dictionary-based training data structure. +- Load NIfTI images with metadata. +- Scale medical image intensity with expected range. +- Crop out a batch of balanced image patch samples based on positive / negative label ratio. +- Cache IO and transforms to accelerate training and validation. +- 3D UNet, Dice loss function, Mean Dice metric for 3D segmentation task. +- Sliding window inference. +- Deterministic training for reproducibility. +#### [unet_segmentation_3d_catalyst](./3d_segmentation/unet_segmentation_3d_catalyst.ipynb) +This notebook shows how MONAI may be used in conjunction with the [Catalyst](https://github.com/catalyst-team/catalyst) framework. +#### [unet_segmentation_3d_ignite](./3d_segmentation/unet_segmentation_3d_ignite.ipynb) +This notebook is an end-to-end training & evaluation example of 3D segmentation based on synthetic dataset. +The example is a PyTorch Ignite program and shows several key features of MONAI, especially with medical domain specific transforms and event handlers. + +**acceleration** +#### [distributed_training](./acceleration/distributed_training) +The examples show how to execute distributed training and evaluation based on 3 different frameworks: +- PyTorch native `DistributedDataParallel` module with `torch.distributed.launch`. +- Horovod APIs with `horovodrun`. +- PyTorch ignite and MONAI workflows. + +They can run on several distributed nodes with multiple GPU devices on every node. +#### [automatic_mixed_precision](./acceleration/accautomatic_mixed_precision.ipynb) This tutorial shows how to apply the automatic mixed precision(AMP) feature of PyTorch into training and evaluation programs. And compares the training speed and memory usage with/without AMP. -#### [brats_segmentation_3d](./brats_segmentation_3d.ipynb) -This tutorial shows how to construct a training workflow of multi-labels segmentation task based on [MSD Brain Tumor dataset](http://medicaldecathlon.com). -#### [dataset_type_performance](./dataset_type_performance.ipynb) +#### [dataset_type_performance](./acceleration/dataset_type_performance.ipynb) This notebook compares the performance of `Dataset`, `CacheDataset` and `PersistentDataset`. These classes differ in how data is stored (in memory or on disk), and at which moment transforms are applied. -#### [fast_training_tutorial](./fast_training_tutorial.ipynb) +#### [fast_training_tutorial](./acceleration/fast_training_tutorial.ipynb) This tutorial compares the training performance of pure PyTorch program and optimized program in MONAI based on NVIDIA GPU device and latest CUDA library. The optimization methods mainly include: `AMP`, `CacheDataset` and `Novograd`. -#### [integrate_3rd_party_transforms](./integrate_3rd_party_transforms.ipynb) +#### [multi_gpu_test](./acceleration/multi_gpu_test.ipynb) +This notebook is a quick demo for devices, run the Ignite trainer engine on CPU, GPU and multiple GPUs. +#### [transform_speed](./acceleration/transform_speed.ipynb) +Illustrate reading NIfTI files and test speed of different transforms on different devices. + +**modules** +#### [workflows](./modules/workflows) +Training and evaluation examples of 3D segmentation based on UNet3D and synthetic dataset. And GAN training and evaluation example for a medical image generative adversarial network. Easy run training script uses `GanTrainer` to train a 2D CT scan reconstruction network. Evaluation script generates random samples from a trained network. + +The examples are built with MONAI workflows, mainly contain: trainer/evaluator, handlers, post_transforms, etc. +#### [3d_image_transforms](./modules/3d_image_transforms.ipynb) +This notebook demonstrates the transformations on volumetric images. +#### [dynunet_tutorial](./modules/dynunet_tutorial.ipynb) +This tutorial shows how to train 3D segmentation tasks on all the 10 decathlon datasets with the reimplementation of dynUNet in MONAI. +#### [integrate_3rd_party_transforms](./modules/integrate_3rd_party_transforms.ipynb) This tutorial shows how to integrate 3rd party transforms into MONAI program. Mainly shows transforms from BatchGenerator, TorchIO, Rising and ITK. -#### [load_medical_imagesl](./load_medical_images.ipynb) +#### [load_medical_imagesl](./modules/load_medical_images.ipynb) This notebook introduces how to easily load different formats of medical images in MONAI and execute many additional operations. -#### [mednist_GAN_tutorial](./mednist_GAN_tutorial.ipynb) +#### [mednist_GAN_tutorial](./modules/mednist_GAN_tutorial.ipynb) This notebook illustrates the use of MONAI for training a network to generate images from a random input tensor. A simple GAN is employed to do with a separate Generator and Discriminator networks. -#### [mednist_GAN_workflow](./mednist_GAN_workflow.ipynb) +#### [mednist_GAN_workflow](./modules/mednist_GAN_workflow.ipynb) This notebook shows the `GanTrainer`, a MONAI workflow engine for modularized adversarial learning. Train a medical image reconstruction network using the MedNIST hand CT scan dataset. Based on the tutorial. -#### [mednist_tutorial](./mednist_tutorial.ipynb) -This notebook shows how to easily integrate MONAI features into existing PyTorch programs. -It's based on the MedNIST dataset which is very suitable for beginners as a tutorial. -The content is also available as [a Colab tutorial](https://colab.research.google.com/drive/1wy8XUSnNWlhDNazFdvGBHLfdkGvOHBKe). -#### [models_ensemble](./models_ensemble.ipynb) +#### [models_ensemble](./modules/models_ensemble.ipynb) This tutorial shows how to leverage `EnsembleEvaluator`, `MeanEnsemble` and `VoteEnsemble` modules in MONAI to set up ensemble program. -#### [multi_gpu_test](./multi_gpu_test.ipynb) -This notebook is a quick demo for devices, run the Ignite trainer engine on CPU, GPU and multiple GPUs. -#### [nifti_read_example](./nifti_read_example.ipynb) +#### [nifti_read_example](./modules/nifti_read_example.ipynb) Illustrate reading NIfTI files and iterating over image patches of the volumes loaded from them. -#### [dynunet_tutorial](./dynunet_tutorial.ipynb) +#### [dynunet_tutorial](./modules/dynunet_tutorial.ipynb) This tutorial shows how to train 3D segmentation tasks on all the 10 decathlon datasets with the reimplementation of dynUNet in MONAI. -#### [post_transforms](./post_transforms.ipynb) +#### [post_transforms](./modules/post_transforms.ipynb) This notebook shows the usage of several post transforms based on the model output of spleen segmentation task. -#### [public_datasets](./public_datasets.ipynb) +#### [public_datasets](./modules/public_datasets.ipynb) This notebook shows how to quickly set up training workflow based on `MedNISTDataset` and `DecathlonDataset`, and how to create a new dataset. -#### [spleen_segmentation_3d](./spleen_segmentation_3d.ipynb) -This notebook is an end-to-end training and evaluation example of 3D segmentation based on [MSD Spleen dataset](http://medicaldecathlon.com). -The example shows the flexibility of MONAI modules in a PyTorch-based program: -- Transforms for dictionary-based training data structure. -- Load NIfTI images with metadata. -- Scale medical image intensity with expected range. -- Crop out a batch of balanced image patch samples based on positive / negative label ratio. -- Cache IO and transforms to accelerate training and validation. -- 3D UNet, Dice loss function, Mean Dice metric for 3D segmentation task. -- Sliding window inference. -- Deterministic training for reproducibility. -#### [spleen_segmentation_3d_lightning](./spleen_segmentation_3d_lightning.ipynb) -This notebook shows how MONAI may be used in conjunction with the [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning) framework. -#### [unet_segmentation_3d_catalyst](./unet_segmentation_3d_catalyst.ipynb) -This notebook shows how MONAI may be used in conjunction with the [Catalyst](https://github.com/catalyst-team/catalyst) framework. -#### [transform_speed](./transform_speed.ipynb) -Illustrate reading NIfTI files and test speed of different transforms on different devices. -#### [transforms_demo_2d](./transforms_demo_2d.ipynb) +#### [transforms_demo_2d](./modules/transforms_demo_2d.ipynb) This notebook demonstrates the image transformations on histology images using [the GlaS Contest dataset](https://warwick.ac.uk/fac/sci/dcs/research/tia/glascontest/download/). -#### [unet_segmentation_3d_ignite](./unet_segmentation_3d_ignite.ipynb) -This notebook is an end-to-end training & evaluation example of 3D segmentation based on synthetic dataset. -The example is a PyTorch Ignite program and shows several key features of MONAI, especially with medical domain specific transforms and event handlers. diff --git a/automatic_mixed_precision.ipynb b/acceleration/automatic_mixed_precision.ipynb similarity index 99% rename from automatic_mixed_precision.ipynb rename to acceleration/automatic_mixed_precision.ipynb index 7d63e2bd96..043e169ec1 100644 --- a/automatic_mixed_precision.ipynb +++ b/acceleration/automatic_mixed_precision.ipynb @@ -16,7 +16,7 @@ "\n", "The Spleen dataset can be downloaded from http://medicaldecathlon.com/.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/automatic_mixed_precision.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/automatic_mixed_precision.ipynb)" ] }, { diff --git a/dataset_type_performance.ipynb b/acceleration/dataset_type_performance.ipynb similarity index 99% rename from dataset_type_performance.ipynb rename to acceleration/dataset_type_performance.ipynb index 2b658e08e9..18782c6393 100644 --- a/dataset_type_performance.ipynb +++ b/acceleration/dataset_type_performance.ipynb @@ -17,7 +17,7 @@ "\n", "It's modified from the Spleen 3D segmentation tutorial notebook.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/persistent_dataset_speed.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/persistent_dataset_speed.ipynb)" ] }, { @@ -694,7 +694,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/acceleration/distributed_training/brats_training_ddp.py b/acceleration/distributed_training/brats_training_ddp.py new file mode 100644 index 0000000000..1dcbb084b8 --- /dev/null +++ b/acceleration/distributed_training/brats_training_ddp.py @@ -0,0 +1,475 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed training based on PyTorch native `DistributedDataParallel` module. +It can run on several nodes with multiple GPU devices on every node. + +This example is a real-world task based on Decathlon challenge Task01: Brain Tumor segmentation. +So it's more complicated than other distributed training demo examples. + +Main steps to set up the distributed training: + +- Execute `torch.distributed.launch` to create processes on every node for every GPU. + It receives parameters as below: + `--nproc_per_node=NUM_GPUS_PER_NODE` + `--nnodes=NUM_NODES` + `--node_rank=INDEX_CURRENT_NODE` + `--master_addr="192.168.1.1"` + `--master_port=1234` + For more details, refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py. + Alternatively, we can also use `torch.multiprocessing.spawn` to start program, but it that case, need to handle + all the above parameters and compute `rank` manually, then set to `init_process_group`, etc. + `torch.distributed.launch` is even more efficient than `torch.multiprocessing.spawn` during training. +- Use `init_process_group` to initialize every process, every GPU runs in a separate process with unique rank. + Here we use `NVIDIA NCCL` as the backend and must set `init_method="env://"` if use `torch.distributed.launch`. +- Wrap the model with `DistributedDataParallel` after moving to expected device. +- Partition dataset before training, so every rank process will only handle its own data partition. + +Note: + `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total. + Suggest setting exactly the same software environment for every node, especially `PyTorch`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly. + Example script to execute this program on every node: + python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE + --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE + --master_addr="192.168.1.1" --master_port=1234 + brats_training_ddp.py -d DIR_OF_TESTDATA + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3]. + +Referring to: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +Some codes are taken from https://github.com/pytorch/examples/blob/master/imagenet/main.py + +""" + +import argparse +import os +import sys +import time +import warnings + +import numpy as np +import torch +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data.distributed import DistributedSampler +from torch.utils.tensorboard import SummaryWriter + +from monai.apps import DecathlonDataset +from monai.data import DataLoader +from monai.losses import DiceLoss +from monai.metrics import DiceMetric +from monai.networks.nets import SegResNet, UNet +from monai.transforms import ( + AsChannelFirstd, + CenterSpatialCropd, + Compose, + LoadNiftid, + MapTransform, + NormalizeIntensityd, + Orientationd, + RandFlipd, + RandScaleIntensityd, + RandShiftIntensityd, + RandSpatialCropd, + Spacingd, + ToTensord, +) +from monai.utils import set_determinism + + +class ConvertToMultiChannelBasedOnBratsClassesd(MapTransform): + """ + Convert labels to multi channels based on brats classes: + label 1 is the peritumoral edema + label 2 is the GD-enhancing tumor + label 3 is the necrotic and non-enhancing tumor core + The possible classes are TC (Tumor core), WC (Whole tumor) + and ET (Enhancing tumor). + + """ + + def __call__(self, data): + d = dict(data) + for key in self.keys: + result = list() + # merge label 2 and label 3 to construct TC + result.append(np.logical_or(d[key] == 2, d[key] == 3)) + # merge labels 1, 2 and 3 to construct WC + result.append(np.logical_or(np.logical_or(d[key] == 2, d[key] == 3), d[key] == 1)) + # label 2 is ET + result.append(d[key] == 2) + d[key] = np.stack(result, axis=0).astype(np.float32) + return d + + +def partition_dataset(data, shuffle: bool = False, seed: int = 0): + """ + Partition the dataset for distributed training, every rank process only train with its own data partition. + It can be useful for `CacheDataset` or `SmartCacheDataset`, because every rank process can only compute and + cache its own data. + Note that every rank process will shuffle data only in its own partition if set `shuffle=True` to DataLoader. + The alternative solution is to use `DistributedSampler`, which supports global shuffle before every epoch. + But if using `CacheDataset` or `SmartCacheDataset`, every rank process will cache duplicated data content and + raise system memory usage. + Args: + data: data list to partition, assumed to be of constant size. + shuffle: if true, will shuffle the indices of data list before partition. + seed: random seed to shuffle the indices if `shuffle=True`. + this number should be identical across all processes in the distributed group. + """ + sampler: DistributedSampler = DistributedSampler(dataset=data, shuffle=shuffle) # type: ignore + sampler.set_epoch(seed) + return [data[i] for i in sampler] + + +class BratsCacheDataset(DecathlonDataset): + def __init__( + self, + root_dir, + section, + transform=LoadNiftid(["image", "label"]), + cache_rate=1.0, + num_workers=0, + shuffle=False, + ) -> None: + + if not os.path.isdir(root_dir): + raise ValueError("Root directory root_dir must be a directory.") + self.section = section + self.shuffle = shuffle + self.val_frac = 0.2 + self.set_random_state(seed=0) + dataset_dir = os.path.join(root_dir, "Task01_BrainTumour") + if not os.path.exists(dataset_dir): + raise RuntimeError( + f"Cannot find dataset directory: {dataset_dir}, please download it from Decathlon challenge." + ) + data = self._generate_data_list(dataset_dir) + super(DecathlonDataset, self).__init__(data, transform, cache_rate=cache_rate, num_workers=num_workers) + + def _generate_data_list(self, dataset_dir): + data = super()._generate_data_list(dataset_dir) + return partition_dataset(data, shuffle=self.shuffle, seed=0) + + +def main_worker(args): + # disable logging for processes except 0 on every node + if args.local_rank != 0: + f = open(os.devnull, "w") + sys.stdout = sys.stderr = f + if not os.path.exists(args.dir): + raise FileNotFoundError(f"Missing directory {args.dir}") + + # initialize the distributed training process, every GPU runs in a process + dist.init_process_group(backend="nccl", init_method="env://") + + total_start = time.time() + train_transforms = Compose( + [ + # load 4 Nifti images and stack them together + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys="image"), + ConvertToMultiChannelBasedOnBratsClassesd(keys="label"), + Spacingd(keys=["image", "label"], pixdim=(1.5, 1.5, 2.0), mode=("bilinear", "nearest")), + Orientationd(keys=["image", "label"], axcodes="RAS"), + RandSpatialCropd(keys=["image", "label"], roi_size=[128, 128, 64], random_size=False), + NormalizeIntensityd(keys="image", nonzero=True, channel_wise=True), + RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=0), + RandScaleIntensityd(keys="image", factors=0.1, prob=0.5), + RandShiftIntensityd(keys="image", offsets=0.1, prob=0.5), + ToTensord(keys=["image", "label"]), + ] + ) + + # create a training data loader + train_ds = BratsCacheDataset( + root_dir=args.dir, + transform=train_transforms, + section="training", + num_workers=4, + cache_rate=args.cache_rate, + shuffle=True, + ) + train_loader = DataLoader( + train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True + ) + + # validation transforms and dataset + val_transforms = Compose( + [ + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys="image"), + ConvertToMultiChannelBasedOnBratsClassesd(keys="label"), + Spacingd(keys=["image", "label"], pixdim=(1.5, 1.5, 2.0), mode=("bilinear", "nearest")), + Orientationd(keys=["image", "label"], axcodes="RAS"), + CenterSpatialCropd(keys=["image", "label"], roi_size=[128, 128, 64]), + NormalizeIntensityd(keys="image", nonzero=True, channel_wise=True), + ToTensord(keys=["image", "label"]), + ] + ) + val_ds = BratsCacheDataset( + root_dir=args.dir, + transform=val_transforms, + section="validation", + num_workers=4, + cache_rate=args.cache_rate, + shuffle=False, + ) + val_loader = DataLoader( + val_ds, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True + ) + + if dist.get_rank() == 0: + # Logging for TensorBoard + writer = SummaryWriter(log_dir=args.log_dir) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{args.local_rank}") + if args.network == "UNet": + model = UNet( + dimensions=3, + in_channels=4, + out_channels=3, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + else: + model = SegResNet(in_channels=4, out_channels=3, init_filters=16, dropout_prob=0.2).to(device) + loss_function = DiceLoss(to_onehot_y=False, sigmoid=True, squared_pred=True) + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5, amsgrad=True) + # wrap the model with DistributedDataParallel module + model = DistributedDataParallel(model, device_ids=[args.local_rank]) + + # start a typical PyTorch training + total_epoch = args.epochs + best_metric = -1000000 + best_metric_epoch = -1 + epoch_time = AverageMeter("Time", ":6.3f") + progress = ProgressMeter(total_epoch, [epoch_time], prefix="Epoch: ") + end = time.time() + print(f"Time elapsed before training: {end-total_start}") + for epoch in range(total_epoch): + + train_loss = train(train_loader, model, loss_function, optimizer, epoch, args, device) + epoch_time.update(time.time() - end) + + if epoch % args.print_freq == 0: + progress.display(epoch) + + if dist.get_rank() == 0: + writer.add_scalar("Loss/train", train_loss, epoch) + + if (epoch + 1) % args.val_interval == 0: + metric, metric_tc, metric_wt, metric_et = evaluate(model, val_loader, device) + + if dist.get_rank() == 0: + writer.add_scalar("Mean Dice/val", metric, epoch) + writer.add_scalar("Mean Dice TC/val", metric_tc, epoch) + writer.add_scalar("Mean Dice WT/val", metric_wt, epoch) + writer.add_scalar("Mean Dice ET/val", metric_et, epoch) + if metric > best_metric: + best_metric = metric + best_metric_epoch = epoch + 1 + print( + f"current epoch: {epoch + 1} current mean dice: {metric:.4f}" + f" tc: {metric_tc:.4f} wt: {metric_wt:.4f} et: {metric_et:.4f}" + f"\nbest mean dice: {best_metric:.4f} at epoch: {best_metric_epoch}" + ) + end = time.time() + print(f"Time elapsed after epoch {epoch + 1} is {end - total_start}") + + if dist.get_rank() == 0: + print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") + # all processes should see same parameters as they all start from same + # random parameters and gradients are synchronized in backward passes, + # therefore, saving it in one process is sufficient + torch.save(model.state_dict(), "final_model.pth") + writer.flush() + dist.destroy_process_group() + + +def train(train_loader, model, criterion, optimizer, epoch, args, device): + batch_time = AverageMeter("Time", ":6.3f") + data_time = AverageMeter("Data", ":6.3f") + losses = AverageMeter("Loss", ":.4e") + progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses], prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, batch_data in enumerate(train_loader): + image = batch_data["image"].to(device, non_blocking=True) + target = batch_data["label"].to(device, non_blocking=True) + + # measure data loading time + data_time.update(time.time() - end) + + # compute output + optimizer.zero_grad() + output = model(image) + loss = criterion(output, target) + + # record loss + losses.update(loss.item(), image.size(0)) + + # compute gradient and do GD step + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % 10 == 0: + progress.display(i) + return losses.avg + + +def evaluate(model, data_loader, device): + metric = torch.zeros(8, dtype=torch.float, device=device) + + model.eval() + with torch.no_grad(): + dice_metric = DiceMetric(include_background=True, sigmoid=True, reduction="mean") + for val_data in data_loader: + val_inputs, val_labels = ( + val_data["image"].to(device, non_blocking=True), + val_data["label"].to(device, non_blocking=True), + ) + val_outputs = model(val_inputs) + # compute overall mean dice + value = dice_metric(y_pred=val_outputs, y=val_labels).squeeze() + metric[0] += value * dice_metric.not_nans + metric[1] += dice_metric.not_nans + # compute mean dice for TC + value_tc = dice_metric(y_pred=val_outputs[:, 0:1], y=val_labels[:, 0:1]).squeeze() + metric[2] += value_tc * dice_metric.not_nans + metric[3] += dice_metric.not_nans + # compute mean dice for WT + value_wt = dice_metric(y_pred=val_outputs[:, 1:2], y=val_labels[:, 1:2]).squeeze() + metric[4] += value_wt * dice_metric.not_nans + metric[5] += dice_metric.not_nans + # compute mean dice for ET + value_et = dice_metric(y_pred=val_outputs[:, 2:3], y=val_labels[:, 2:3]).squeeze() + metric[6] += value_et * dice_metric.not_nans + metric[7] += dice_metric.not_nans + + # synchronizes all processes and reduce results + dist.barrier() + dist.all_reduce(metric, op=torch.distributed.ReduceOp.SUM) + metric = metric.tolist() + + return metric[0] / metric[1], metric[2] / metric[3], metric[4] / metric[5], metric[6] / metric[7] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory of Brain Tumor dataset.") + # must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by DDP + parser.add_argument("--local_rank", type=int, help="node rank for distributed training") + parser.add_argument( + "-j", "--workers", default=1, type=int, metavar="N", help="number of data loading workers (default: 1)" + ) + parser.add_argument("--epochs", default=90, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument("--lr", default=1e-4, type=float, help="learning rate") + parser.add_argument( + "-b", + "--batch_size", + default=4, + type=int, + metavar="N", + help="mini-batch size (default: 256), this is the total " + "batch size of all GPUs on the current node when " + "using Data Parallel or Distributed Data Parallel", + ) + parser.add_argument("-p", "--print_freq", default=10, type=int, metavar="N", help="print frequency (default: 10)") + parser.add_argument( + "-e", "--evaluate", dest="evaluate", action="store_true", help="evaluate model on validation set" + ) + parser.add_argument("--seed", default=None, type=int, help="seed for initializing training.") + parser.add_argument("--cache_rate", type=float, default=1.0) + parser.add_argument("--val_interval", type=int, default=5) + parser.add_argument("--network", type=str, default="UNet", choices=["UNet", "SegResNet"]) + parser.add_argument("--log_dir", type=str, default=None) + args = parser.parse_args() + + if args.seed is not None: + set_determinism(seed=args.seed) + warnings.warn( + "You have chosen to seed training. " + "This will turn on the CUDNN deterministic setting, " + "which can slow down your training considerably! " + "You may see unexpected behavior when restarting " + "from checkpoints." + ) + + main_worker(args=args) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=":f"): + self.name = name + self.fmt = fmt + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print("\t".join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = "{:" + str(num_digits) + "d}" + return "[" + fmt + "/" + fmt.format(num_batches) + "]" + + +# usage example(refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py): + +# python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE +# --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE +# --master_addr="10.110.44.150" --master_port=1234 +# brats_training_ddp.py -d DIR_OF_TESTDATA + +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_evaluation_ddp.py b/acceleration/distributed_training/unet_evaluation_ddp.py new file mode 100644 index 0000000000..9cc1851a0c --- /dev/null +++ b/acceleration/distributed_training/unet_evaluation_ddp.py @@ -0,0 +1,166 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed evaluation based on PyTorch native `DistributedDataParallel` module. +It can run on several nodes with multiple GPU devices on every node. +Main steps to set up the distributed evaluation: + +- Execute `torch.distributed.launch` to create processes on every node for every GPU. + It receives parameters as below: + `--nproc_per_node=NUM_GPUS_PER_NODE` + `--nnodes=NUM_NODES` + `--node_rank=INDEX_CURRENT_NODE` + `--master_addr="192.168.1.1"` + `--master_port=1234` + For more details, refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py. + Alternatively, we can also use `torch.multiprocessing.spawn` to start program, but it that case, need to handle + all the above parameters and compute `rank` manually, then set to `init_process_group`, etc. + `torch.distributed.launch` is even more efficient than `torch.multiprocessing.spawn`. +- Use `init_process_group` to initialize every process, every GPU runs in a separate process with unique rank. + Here we use `NVIDIA NCCL` as the backend and must set `init_method="env://"` if use `torch.distributed.launch`. +- Wrap the model with `DistributedDataParallel` after moving to expected device. +- Put model file on every node, then load and map to expected GPU device in every process. +- Wrap Dataset with `DistributedSampler`, disable the `shuffle` in sampler and DataLoader. +- Compute `Dice Metric` on every process, reduce the results after synchronization. + +Note: + `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total. + Suggest setting exactly the same software environment for every node, especially `PyTorch`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly. + Example script to execute this program on every node: + python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE + --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE + --master_addr="192.168.1.1" --master_port=1234 + unet_evaluation_ddp.py -d DIR_OF_TESTDATA + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3]. + +Referring to: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +""" + +import argparse +import os +from glob import glob + +import nibabel as nib +import numpy as np +import torch +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data.distributed import DistributedSampler + +import monai +from monai.data import DataLoader, Dataset, create_test_image_3d +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.transforms import AsChannelFirstd, Compose, LoadNiftid, ScaleIntensityd, ToTensord + + +def evaluate(args): + if args.local_rank == 0 and not os.path.exists(args.dir): + # create 16 random image, mask paris for evaluation + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(16): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + # initialize the distributed evaluation process, every GPU runs in a process + dist.init_process_group(backend="nccl", init_method="env://") + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + val_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + + # create a evaluation data loader + val_ds = Dataset(data=val_files, transform=val_transforms) + # create a evaluation data sampler + val_sampler = DistributedSampler(val_ds, shuffle=False) + # sliding window inference need to input 1 image in every iteration + val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=2, pin_memory=True, sampler=val_sampler) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{args.local_rank}") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + # wrap the model with DistributedDataParallel module + model = DistributedDataParallel(model, device_ids=[args.local_rank]) + # config mapping to expected GPU device + map_location = {"cuda:0": f"cuda:{args.local_rank}"} + # load model parameters to GPU device + model.load_state_dict(torch.load("final_model.pth", map_location=map_location)) + + model.eval() + with torch.no_grad(): + # define PyTorch Tensor to record metrics result at each GPU + # the first value is `sum` of all dice metric, the second value is `count` of not_nan items + metric = torch.zeros(2, dtype=torch.float, device=device) + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device) + # define sliding window size and batch size for windows inference + roi_size = (96, 96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels).squeeze() + metric[0] += value * dice_metric.not_nans + metric[1] += dice_metric.not_nans + # synchronizes all processes and reduce results + dist.barrier() + dist.all_reduce(metric, op=torch.distributed.ReduceOp.SUM) + metric = metric.tolist() + if dist.get_rank() == 0: + print("evaluation metric:", metric[0] / metric[1]) + dist.destroy_process_group() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + # must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by DDP + parser.add_argument("--local_rank", type=int) + args = parser.parse_args() + + evaluate(args=args) + + +# usage example(refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py): + +# python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE +# --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE +# --master_addr="192.168.1.1" --master_port=1234 +# unet_evaluation_ddp.py -d DIR_OF_TESTDATA + +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_evaluation_horovod.py b/acceleration/distributed_training/unet_evaluation_horovod.py new file mode 100644 index 0000000000..463e5bcc6a --- /dev/null +++ b/acceleration/distributed_training/unet_evaluation_horovod.py @@ -0,0 +1,165 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed evaluation based on Horovod APIs. +It can run on several nodes with multiple GPU devices on every node. +Main steps to set up the distributed evaluation: + +- Install Horovod referring to the guide: https://github.com/horovod/horovod/blob/master/docs/gpus.rst + If using MONAI docker, which already has NCCL and MPI, can quickly install Horovod with command: + `HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib/x86_64-linux-gnu HOROVOD_GPU_OPERATIONS=NCCL \ + pip install --no-cache-dir horovod` +- Set SSH permissions for root login without password at all nodes except master, referring to: + http://www.linuxproblem.org/art_9.html +- Run `hvd.init()` to initialize Horovod. +- Pin each GPU to a single process to avoid resource contention, use `hvd.local_rank()` to get GPU index. + And use `hvd.rank()` to get the overall rank index. +- Wrap Dataset with `DistributedSampler`, disable `shuffle` for sampler and DataLoader. +- Broadcast the model parameters from rank 0 to all other processes. + +Note: + Suggest setting exactly the same software environment for every node, especially `mpi`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly, if using docker, need + to set SSH permissions both at the node and in docker, referring to Horovod guide for more details: + https://github.com/horovod/horovod/blob/master/docs/docker.rst + + Example script to execute this program, only need to run on the master node: + `horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_evaluation_horovod.py -d "./testdata"` + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3], [horovod 0.19.5]. + +Referring to: https://github.com/horovod/horovod/blob/master/examples/pytorch_mnist.py + +""" + +import argparse +import os +from glob import glob + +import horovod.torch as hvd +import nibabel as nib +import numpy as np +import torch +import torch.multiprocessing as mp +from torch.utils.data.distributed import DistributedSampler + +import monai +from monai.data import DataLoader, Dataset, create_test_image_3d +from monai.inferers import sliding_window_inference +from monai.metrics import DiceMetric +from monai.transforms import AsChannelFirstd, Compose, LoadNiftid, ScaleIntensityd, ToTensord + + +def evaluate(args): + # initialize Horovod library + hvd.init() + # Horovod limits CPU threads to be used per worker + torch.set_num_threads(1) + + if hvd.local_rank() == 0 and not os.path.exists(args.dir): + # create 16 random image, mask paris for evaluation + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(16): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + val_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + ToTensord(keys=["img", "seg"]), + ] + ) + + # create a evaluation data loader + val_ds = Dataset(data=val_files, transform=val_transforms) + # create a evaluation data sampler + val_sampler = DistributedSampler(val_ds, shuffle=False, num_replicas=hvd.size(), rank=hvd.rank()) + # when supported, use "forkserver" to spawn dataloader workers instead of "fork" to prevent + # issues with Infiniband implementations that are not fork-safe + multiprocessing_context = None + if hasattr(mp, "_supports_context") and mp._supports_context and "forkserver" in mp.get_all_start_methods(): + multiprocessing_context = "forkserver" + # sliding window inference need to input 1 image in every iteration + val_loader = DataLoader( + val_ds, + batch_size=1, + shuffle=False, + num_workers=2, + pin_memory=True, + sampler=val_sampler, + multiprocessing_context=multiprocessing_context, + ) + dice_metric = DiceMetric(include_background=True, to_onehot_y=False, sigmoid=True, reduction="mean") + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{hvd.local_rank()}") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + if hvd.rank() == 0: + # load model parameters for evaluation + model.load_state_dict(torch.load("final_model.pth")) + # Horovod broadcasts parameters + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + + model.eval() + with torch.no_grad(): + # define PyTorch Tensor to record metrics result at each GPU + # the first value is `sum` of all dice metric, the second value is `count` of not_nan items + metric = torch.zeros(2, dtype=torch.float, device=device) + for val_data in val_loader: + val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device) + # define sliding window size and batch size for windows inference + roi_size = (96, 96, 96) + sw_batch_size = 4 + val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) + value = dice_metric(y_pred=val_outputs, y=val_labels).squeeze() + metric[0] += value * dice_metric.not_nans + metric[1] += dice_metric.not_nans + # synchronizes all processes and reduce results + print(f"metric in rank {hvd.rank()}: sum={metric[0].item()}, count={metric[1].item()}") + avg_metric = hvd.allreduce(metric, name="mean_dice") + if hvd.rank() == 0: + print(f"average metric: sum={avg_metric[0].item()}, count={avg_metric[1].item()}") + print("evaluation metric:", (avg_metric[0] / avg_metric[1]).item()) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + args = parser.parse_args() + + evaluate(args=args) + + +# Example script to execute this program only on the master node: +# horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_evaluation_horovod.py -d "./testdata" +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_evaluation_workflows.py b/acceleration/distributed_training/unet_evaluation_workflows.py new file mode 100644 index 0000000000..22b41206a4 --- /dev/null +++ b/acceleration/distributed_training/unet_evaluation_workflows.py @@ -0,0 +1,203 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed evaluation based on PyTorch native `DistributedDataParallel` module +and MONAI workflows. It can run on several nodes with multiple GPU devices on every node. +Main steps to set up the distributed evaluation: + +- Execute `torch.distributed.launch` to create processes on every node for every GPU. + It receives parameters as below: + `--nproc_per_node=NUM_GPUS_PER_NODE` + `--nnodes=NUM_NODES` + `--node_rank=INDEX_CURRENT_NODE` + `--master_addr="192.168.1.1"` + `--master_port=1234` + For more details, refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py. + Alternatively, we can also use `torch.multiprocessing.spawn` to start program, but it that case, need to handle + all the above parameters and compute `rank` manually, then set to `init_process_group`, etc. + `torch.distributed.launch` is even more efficient than `torch.multiprocessing.spawn`. +- Use `init_process_group` to initialize every process, every GPU runs in a separate process with unique rank. + Here we use `NVIDIA NCCL` as the backend and must set `init_method="env://"` if use `torch.distributed.launch`. +- Wrap the model with `DistributedDataParallel` after moving to expected device. +- Put model file on every node, then load and map to expected GPU device in every process. +- Wrap Dataset with `DistributedSampler`, disable the `shuffle` in sampler and DataLoader. +- Add `StatsHandler` and `SegmentationSaver` to the master process which is `dist.get_rank() == 0`. +- ignite can automatically reduce metrics for distributed evaluation, refer to: + https://github.com/pytorch/ignite/blob/v0.3.0/ignite/metrics/metric.py#L85 + +Note: + `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total. + Suggest setting exactly the same software environment for every node, especially `PyTorch`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly. + Example script to execute this program on every node: + python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE + --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE + --master_addr="192.168.1.1" --master_port=1234 + unet_evaluation_workflows.py -d DIR_OF_TESTDATA + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3]. + +Referring to: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +""" + +import argparse +import logging +import os +import sys +from glob import glob + +import nibabel as nib +import numpy as np +import torch +import torch.distributed as dist +from ignite.metrics import Accuracy +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data.distributed import DistributedSampler + +import monai +from monai.data import DataLoader, Dataset, create_test_image_3d +from monai.engines import SupervisedEvaluator +from monai.handlers import CheckpointLoader, MeanDice, SegmentationSaver, StatsHandler +from monai.inferers import SlidingWindowInferer +from monai.transforms import ( + Activationsd, + AsChannelFirstd, + AsDiscreted, + Compose, + KeepLargestConnectedComponentd, + LoadNiftid, + ScaleIntensityd, + ToTensord, +) + + +def evaluate(args): + if args.local_rank == 0 and not os.path.exists(args.dir): + # create 16 random image, mask paris for evaluation + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(16): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + # initialize the distributed evaluation process, every GPU runs in a process + dist.init_process_group(backend="nccl", init_method="env://") + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + val_files = [{"image": img, "label": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys=["image", "label"], channel_dim=-1), + ScaleIntensityd(keys="image"), + ToTensord(keys=["image", "label"]), + ] + ) + + # create a evaluation data loader + val_ds = Dataset(data=val_files, transform=val_transforms) + # create a evaluation data sampler + val_sampler = DistributedSampler(val_ds, shuffle=False) + # sliding window inference need to input 1 image in every iteration + val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=2, pin_memory=True, sampler=val_sampler) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{args.local_rank}") + net = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + # wrap the model with DistributedDataParallel module + net = DistributedDataParallel(net, device_ids=[args.local_rank]) + + val_post_transforms = Compose( + [ + Activationsd(keys="pred", sigmoid=True), + AsDiscreted(keys="pred", threshold_values=True), + KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), + ] + ) + val_handlers = [ + CheckpointLoader( + load_path="./runs/checkpoint_epoch=4.pth", + load_dict={"net": net}, + # config mapping to expected GPU device + map_location={"cuda:0": f"cuda:{args.local_rank}"}, + ), + ] + if dist.get_rank() == 0: + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + val_handlers.extend( + [ + StatsHandler(output_transform=lambda x: None), + SegmentationSaver( + output_dir="./runs/", + batch_transform=lambda batch: batch["image_meta_dict"], + output_transform=lambda output: output["pred"], + ), + ] + ) + + evaluator = SupervisedEvaluator( + device=device, + val_data_loader=val_loader, + network=net, + inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), + post_transform=val_post_transforms, + key_val_metric={ + "val_mean_dice": MeanDice( + include_background=True, + output_transform=lambda x: (x["pred"], x["label"]), + device=device, + ) + }, + additional_metrics={"val_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]), device=device)}, + val_handlers=val_handlers, + # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation + amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, + ) + evaluator.run() + dist.destroy_process_group() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + # must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by DDP + parser.add_argument("--local_rank", type=int) + args = parser.parse_args() + + evaluate(args=args) + + +# usage example(refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py): + +# python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE +# --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE +# --master_addr="192.168.1.1" --master_port=1234 +# unet_evaluation_workflows.py -d DIR_OF_TESTDATA + +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_training_ddp.py b/acceleration/distributed_training/unet_training_ddp.py new file mode 100644 index 0000000000..1a2db6fc8a --- /dev/null +++ b/acceleration/distributed_training/unet_training_ddp.py @@ -0,0 +1,193 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed training based on PyTorch native `DistributedDataParallel` module. +It can run on several nodes with multiple GPU devices on every node. +Main steps to set up the distributed training: + +- Execute `torch.distributed.launch` to create processes on every node for every GPU. + It receives parameters as below: + `--nproc_per_node=NUM_GPUS_PER_NODE` + `--nnodes=NUM_NODES` + `--node_rank=INDEX_CURRENT_NODE` + `--master_addr="192.168.1.1"` + `--master_port=1234` + For more details, refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py. + Alternatively, we can also use `torch.multiprocessing.spawn` to start program, but it that case, need to handle + all the above parameters and compute `rank` manually, then set to `init_process_group`, etc. + `torch.distributed.launch` is even more efficient than `torch.multiprocessing.spawn` during training. +- Use `init_process_group` to initialize every process, every GPU runs in a separate process with unique rank. + Here we use `NVIDIA NCCL` as the backend and must set `init_method="env://"` if use `torch.distributed.launch`. +- Wrap the model with `DistributedDataParallel` after moving to expected device. +- Wrap Dataset with `DistributedSampler`, and disable the `shuffle` in DataLoader. + Instead, shuffle data by `train_sampler.set_epoch(epoch)` before every epoch. + +Note: + `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total. + Suggest setting exactly the same software environment for every node, especially `PyTorch`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly. + Example script to execute this program on every node: + python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE + --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE + --master_addr="192.168.1.1" --master_port=1234 + unet_training_ddp.py -d DIR_OF_TESTDATA + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3]. + +Referring to: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +""" + +import argparse +import os +import sys +from glob import glob + +import nibabel as nib +import numpy as np +import torch +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data.distributed import DistributedSampler + +import monai +from monai.data import DataLoader, Dataset, create_test_image_3d +from monai.transforms import ( + AsChannelFirstd, + Compose, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) + + +def train(args): + # disable logging for processes except 0 on every node + if args.local_rank != 0: + f = open(os.devnull, "w") + sys.stdout = sys.stderr = f + elif not os.path.exists(args.dir): + # create 40 random image, mask paris for training + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + # initialize the distributed training process, every GPU runs in a process + dist.init_process_group(backend="nccl", init_method="env://") + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + RandCropByPosNegLabeld( + keys=["img", "seg"], label_key="seg", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["img", "seg"]), + ] + ) + + # create a training data loader + train_ds = Dataset(data=train_files, transform=train_transforms) + # create a training data sampler + train_sampler = DistributedSampler(train_ds) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader( + train_ds, + batch_size=2, + shuffle=False, + num_workers=2, + pin_memory=True, + sampler=train_sampler, + ) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{args.local_rank}") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True).to(device) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + # wrap the model with DistributedDataParallel module + model = DistributedDataParallel(model, device_ids=[args.local_rank]) + + # start a typical PyTorch training + epoch_loss_values = list() + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + train_sampler.set_epoch(epoch) + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + print(f"train completed, epoch losses: {epoch_loss_values}") + if dist.get_rank() == 0: + # all processes should see same parameters as they all start from same + # random parameters and gradients are synchronized in backward passes, + # therefore, saving it in one process is sufficient + torch.save(model.state_dict(), "final_model.pth") + dist.destroy_process_group() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + # must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by DDP + parser.add_argument("--local_rank", type=int) + args = parser.parse_args() + + train(args=args) + + +# usage example(refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py): + +# python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE +# --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE +# --master_addr="192.168.1.1" --master_port=1234 +# unet_training_ddp.py -d DIR_OF_TESTDATA + +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_training_horovod.py b/acceleration/distributed_training/unet_training_horovod.py new file mode 100644 index 0000000000..4462fb6507 --- /dev/null +++ b/acceleration/distributed_training/unet_training_horovod.py @@ -0,0 +1,193 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed training based on Horovod APIs. +It can run on several nodes with multiple GPU devices on every node. +Main steps to set up the distributed training: + +- Install Horovod referring to the guide: https://github.com/horovod/horovod/blob/master/docs/gpus.rst + If using MONAI docker, which already has NCCL and MPI, can quickly install Horovod with command: + `HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib/x86_64-linux-gnu HOROVOD_GPU_OPERATIONS=NCCL \ + pip install --no-cache-dir horovod` +- Set SSH permissions for root login without password at all nodes except master, referring to: + http://www.linuxproblem.org/art_9.html +- Run `hvd.init()` to initialize Horovod. +- Pin each GPU to a single process to avoid resource contention, use `hvd.local_rank()` to get GPU index. + And use `hvd.rank()` to get the overall rank index. +- Wrap Dataset with `DistributedSampler`, and disable the `shuffle` in DataLoader. + Instead, shuffle data by `train_sampler.set_epoch(epoch)` before every epoch. +- Wrap the optimizer in hvd.DistributedOptimizer. The distributed optimizer delegates gradient + computation to the original optimizer, averages gradients using allreduce or allgather, + and then applies those averaged gradients. +- Broadcast the initial variable states from rank 0 to all other processes. + +Note: + Suggest setting exactly the same software environment for every node, especially `mpi`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly, if using docker, need + to set SSH permissions both at the node and in docker, referring to Horovod guide for more details: + https://github.com/horovod/horovod/blob/master/docs/docker.rst + + Example script to execute this program, only need to run on the master node: + `horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_training_horovod.py -d "./testdata"` + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3], [horovod 0.19.5]. + +Referring to: https://github.com/horovod/horovod/blob/master/examples/pytorch_mnist.py + +""" + +import argparse +import os +import sys +from glob import glob + +import horovod.torch as hvd +import nibabel as nib +import numpy as np +import torch +import torch.multiprocessing as mp +from torch.utils.data.distributed import DistributedSampler + +import monai +from monai.data import DataLoader, Dataset, create_test_image_3d +from monai.transforms import ( + AsChannelFirstd, + Compose, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) + + +def train(args): + # initialize Horovod library + hvd.init() + # Horovod limits CPU threads to be used per worker + torch.set_num_threads(1) + # disable logging for processes except 0 on every node + if hvd.local_rank() != 0: + f = open(os.devnull, "w") + sys.stdout = sys.stderr = f + elif not os.path.exists(args.dir): + # create 40 random image, mask paris on master node for training + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + RandCropByPosNegLabeld( + keys=["img", "seg"], label_key="seg", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["img", "seg"]), + ] + ) + + # create a training data loader + train_ds = Dataset(data=train_files, transform=train_transforms) + # create a training data sampler + train_sampler = DistributedSampler(train_ds, num_replicas=hvd.size(), rank=hvd.rank()) + # when supported, use "forkserver" to spawn dataloader workers instead of "fork" to prevent + # issues with Infiniband implementations that are not fork-safe + multiprocessing_context = None + if hasattr(mp, "_supports_context") and mp._supports_context and "forkserver" in mp.get_all_start_methods(): + multiprocessing_context = "forkserver" + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader( + train_ds, + batch_size=2, + shuffle=False, + num_workers=2, + pin_memory=True, + sampler=train_sampler, + multiprocessing_context=multiprocessing_context, + ) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{hvd.local_rank()}") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True).to(device) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + # Horovod broadcasts parameters & optimizer state + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=0) + # Horovod wraps optimizer with DistributedOptimizer + optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters()) + + # start a typical PyTorch training + epoch_loss_values = list() + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + train_sampler.set_epoch(epoch) + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = len(train_ds) // train_loader.batch_size + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + print(f"train completed, epoch losses: {epoch_loss_values}") + if hvd.rank() == 0: + # all processes should see same parameters as they all start from same + # random parameters and gradients are synchronized in backward passes, + # therefore, saving it in one process is sufficient + torch.save(model.state_dict(), "final_model.pth") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + args = parser.parse_args() + + train(args=args) + + +# Example script to execute this program only on the master node: +# horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python unet_training_horovod.py -d "./testdata" +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_training_smartcache.py b/acceleration/distributed_training/unet_training_smartcache.py new file mode 100644 index 0000000000..1a2fad1866 --- /dev/null +++ b/acceleration/distributed_training/unet_training_smartcache.py @@ -0,0 +1,259 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed training based on PyTorch native module and SmartCacheDataset. +It can run on several nodes with multiple GPU devices on every node. +It splits data into partitions, every rank only cache and train with its own partition. + +Main steps to set up the distributed training: + +- Execute `torch.distributed.launch` to create processes on every node for every GPU. + It receives parameters as below: + `--nproc_per_node=NUM_GPUS_PER_NODE` + `--nnodes=NUM_NODES` + `--node_rank=INDEX_CURRENT_NODE` + `--master_addr="192.168.1.1"` + `--master_port=1234` + For more details, refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py. + Alternatively, we can also use `torch.multiprocessing.spawn` to start program, but it that case, need to handle + all the above parameters and compute `rank` manually, then set to `init_process_group`, etc. + `torch.distributed.launch` is even more efficient than `torch.multiprocessing.spawn` during training. +- Use `init_process_group` to initialize every process, every GPU runs in a separate process with unique rank. + Here we use `NVIDIA NCCL` as the backend and must set `init_method="env://"` if use `torch.distributed.launch`. +- Wrap the model with `DistributedDataParallel` after moving to expected device. +- Execute `partition_dataset` to load data only for current rank, no need `DistributedSampler` anymore. +- `SmartCacheDataset` computes and caches the data for the first epoch. +- Call `start()` function of `SmartCacheDataset` to start the replacement thread. +- Call `update_cache()` function of `SmartCacheDataset` before every epoch to replace part of cache content. +- Call `shutdown()` function of `SmartCacheDataset` to stop replacement thread when training ends. + +Note: + `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total. + Suggest setting exactly the same software environment for every node, especially `PyTorch`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly. + Example script to execute this program on every node: + python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE + --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE + --master_addr="192.168.1.1" --master_port=1234 + unet_training_smartcache.py -d DIR_OF_TESTDATA + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3]. + +Referring to: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +""" + +import argparse +import math +import os +import sys +from glob import glob + +import nibabel as nib +import numpy as np +import torch +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel + +import monai +from monai.data import DataLoader, SmartCacheDataset, create_test_image_3d +from monai.transforms import ( + AsChannelFirstd, + Compose, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) + + +def partition_dataset(data, num_replicas=None, rank=None, shuffle=False, seed=0, drop_last=False): + """ + Partition the dataset for distributed training, every rank process only train with its own data partition. + It can be useful for `CacheDataset` or `SmartCacheDataset`, because every rank process can only compute and + cache its own data. + Note that every rank process will shuffle data only in its own partition if set `shuffle=True` to DataLoader. + + The alternative solution is to use `DistributedSampler`, which supports global shuffle before every epoch. + But if using `CacheDataset` or `SmartCacheDataset`, every rank process will cache duplicated data content and + raise system memory usage. + + Args: + data: data list to partition, assumed to be of constant size. + num_replicas: number of processes participating in the distributed training. + if None, retrieve the `world_size` from current distributed group. + rank: rank of the current process within `num_replicas`. + if None, retrieve the rank index from current distributed group. + shuffle: if true, will shuffle the indices of data list before partition. + seed: random seed to shuffle the indices if `shuffle=True`, default is `0`. + this number should be identical across all processes in the distributed group. + drop_last: if `True`, will drop the tail of the data to make it evenly divisible across the number of replicas. + if `False`, add extra indices to make the data evenly divisible across the replicas. default is `False`. + + """ + if num_replicas is None or rank is None: + if not dist.is_available(): + raise RuntimeError("require distributed package to be available.") + if num_replicas is None: + num_replicas = dist.get_world_size() + if rank is None: + rank = dist.get_rank() + + if drop_last and len(data) % num_replicas != 0: + # split to nearest available length that is evenly divisible + num_samples = math.ceil((len(data) - num_replicas) / num_replicas) + else: + num_samples = math.ceil(len(data) / num_replicas) + total_size = num_samples * num_replicas + + indices = np.array(list(range(len(data)))) + if shuffle: + # deterministically shuffle based on fixed seed for every process + np.random.seed(seed) + np.random.shuffle(indices) + + if not drop_last and total_size - len(indices) > 0: + # add extra samples to make it evenly divisible + indices += indices[: (total_size - len(indices))] + else: + # remove tail of data to make it evenly divisible + indices = indices[:total_size] + + indices = indices[rank:total_size:num_replicas] + return [data[i] for i in indices] + + +def train(args): + # disable logging for processes except 0 on every node + if args.local_rank != 0: + f = open(os.devnull, "w") + sys.stdout = sys.stderr = f + elif not os.path.exists(args.dir): + # create 40 random image, mask paris for training + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + # initialize the distributed training process, every GPU runs in a process + dist.init_process_group(backend="nccl", init_method="env://") + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + train_files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["img", "seg"]), + AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), + ScaleIntensityd(keys="img"), + RandCropByPosNegLabeld( + keys=["img", "seg"], label_key="seg", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["img", "seg"]), + ] + ) + + # partition dataset based on current rank number, every rank trains with its own data + data_part = partition_dataset(train_files, shuffle=True) + train_ds = SmartCacheDataset( + data=data_part, + transform=train_transforms, + replace_rate=0.2, + cache_num=15, # we suppose to use 2 ranks in this example, every rank has 20 training images + num_init_workers=2, + num_replace_workers=2, + ) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=2, pin_memory=True) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{args.local_rank}") + model = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss_function = monai.losses.DiceLoss(sigmoid=True).to(device) + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + # wrap the model with DistributedDataParallel module + model = DistributedDataParallel(model, device_ids=[args.local_rank]) + + # start a typical PyTorch training + epoch_loss_values = list() + # start the replacement thread of SmartCache + train_ds.start() + + for epoch in range(5): + print("-" * 10) + print(f"epoch {epoch + 1}/{5}") + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_len = math.ceil(len(train_ds) / train_loader.batch_size) + print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + # replace 20% of cache content for next epoch + train_ds.update_cache() + print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") + # stop replacement thread of SmartCache + train_ds.shutdown() + print(f"train completed, epoch losses: {epoch_loss_values}") + if dist.get_rank() == 0: + # all processes should see same parameters as they all start from same + # random parameters and gradients are synchronized in backward passes, + # therefore, saving it in one process is sufficient + torch.save(model.state_dict(), "final_model.pth") + dist.destroy_process_group() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + # must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by DDP + parser.add_argument("--local_rank", type=int) + args = parser.parse_args() + + train(args=args) + + +# usage example(refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py): + +# python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE +# --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE +# --master_addr="192.168.1.1" --master_port=1234 +# unet_training_smartcache.py -d DIR_OF_TESTDATA + +if __name__ == "__main__": + main() diff --git a/acceleration/distributed_training/unet_training_workflows.py b/acceleration/distributed_training/unet_training_workflows.py new file mode 100644 index 0000000000..713a2cd5ff --- /dev/null +++ b/acceleration/distributed_training/unet_training_workflows.py @@ -0,0 +1,206 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This example shows how to execute distributed training based on PyTorch native `DistributedDataParallel` module +and MONAI workflows. It can run on several nodes with multiple GPU devices on every node. +Main steps to set up the distributed training: + +- Execute `torch.distributed.launch` to create processes on every node for every GPU. + It receives parameters as below: + `--nproc_per_node=NUM_GPUS_PER_NODE` + `--nnodes=NUM_NODES` + `--node_rank=INDEX_CURRENT_NODE` + `--master_addr="192.168.1.1"` + `--master_port=1234` + For more details, refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py. + Alternatively, we can also use `torch.multiprocessing.spawn` to start program, but it that case, need to handle + all the above parameters and compute `rank` manually, then set to `init_process_group`, etc. + `torch.distributed.launch` is even more efficient than `torch.multiprocessing.spawn` during training. +- Use `init_process_group` to initialize every process, every GPU runs in a separate process with unique rank. + Here we use `NVIDIA NCCL` as the backend and must set `init_method="env://"` if use `torch.distributed.launch`. +- Wrap the model with `DistributedDataParallel` after moving to expected device. +- Wrap Dataset with `DistributedSampler`, and disable the `shuffle` in DataLoader. + Instead, `SupervisedTrainer` shuffles data by `train_sampler.set_epoch(epoch)` before every epoch. +- Add `StatsHandler` and `CheckpointHandler` to the master process which is `dist.get_rank() == 0`. +- ignite can automatically reduce metrics for distributed training, refer to: + https://github.com/pytorch/ignite/blob/v0.3.0/ignite/metrics/metric.py#L85 + +Note: + `torch.distributed.launch` will launch `nnodes * nproc_per_node = world_size` processes in total. + Suggest setting exactly the same software environment for every node, especially `PyTorch`, `nccl`, etc. + A good practice is to use the same MONAI docker image for all nodes directly. + Example script to execute this program on every node: + python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE + --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE + --master_addr="192.168.1.1" --master_port=1234 + unet_training_workflows.py -d DIR_OF_TESTDATA + + This example was tested with [Ubuntu 16.04/20.04], [NCCL 2.6.3]. + +Referring to: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +""" + +import argparse +import logging +import os +import sys +from glob import glob + +import nibabel as nib +import numpy as np +import torch +import torch.distributed as dist +from ignite.metrics import Accuracy +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data.distributed import DistributedSampler + +import monai +from monai.data import DataLoader, Dataset, create_test_image_3d +from monai.engines import SupervisedTrainer +from monai.handlers import CheckpointSaver, LrScheduleHandler, StatsHandler +from monai.inferers import SimpleInferer +from monai.transforms import ( + Activationsd, + AsChannelFirstd, + AsDiscreted, + Compose, + KeepLargestConnectedComponentd, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) + + +def train(args): + if args.local_rank == 0 and not os.path.exists(args.dir): + # create 40 random image, mask paris for training + print(f"generating synthetic data to {args.dir} (this may take a while)") + os.makedirs(args.dir) + # set random seed to generate same random data for every node + np.random.seed(seed=0) + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(args.dir, f"seg{i:d}.nii.gz")) + + # initialize the distributed training process, every GPU runs in a process + dist.init_process_group(backend="nccl", init_method="env://") + + images = sorted(glob(os.path.join(args.dir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(args.dir, "seg*.nii.gz"))) + train_files = [{"image": img, "label": seg} for img, seg in zip(images, segs)] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys=["image", "label"], channel_dim=-1), + ScaleIntensityd(keys="image"), + RandCropByPosNegLabeld( + keys=["image", "label"], label_key="label", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["image", "label"]), + ] + ) + + # create a training data loader + train_ds = Dataset(data=train_files, transform=train_transforms) + # create a training data sampler + train_sampler = DistributedSampler(train_ds) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = DataLoader( + train_ds, + batch_size=2, + shuffle=False, + num_workers=2, + pin_memory=True, + sampler=train_sampler, + ) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device(f"cuda:{args.local_rank}") + net = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss = monai.losses.DiceLoss(sigmoid=True).to(device) + opt = torch.optim.Adam(net.parameters(), 1e-3) + lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=2, gamma=0.1) + # wrap the model with DistributedDataParallel module + net = DistributedDataParallel(net, device_ids=[args.local_rank]) + + train_post_transforms = Compose( + [ + Activationsd(keys="pred", sigmoid=True), + AsDiscreted(keys="pred", threshold_values=True), + KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), + ] + ) + train_handlers = [ + LrScheduleHandler(lr_scheduler=lr_scheduler, print_lr=True), + ] + if dist.get_rank() == 0: + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + train_handlers.extend( + [ + StatsHandler(tag_name="train_loss", output_transform=lambda x: x["loss"]), + CheckpointSaver(save_dir="./runs/", save_dict={"net": net, "opt": opt}, save_interval=2), + ] + ) + + trainer = SupervisedTrainer( + device=device, + max_epochs=5, + train_data_loader=train_loader, + network=net, + optimizer=opt, + loss_function=loss, + inferer=SimpleInferer(), + # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation + amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, + post_transform=train_post_transforms, + key_train_metric={"train_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]), device=device)}, + train_handlers=train_handlers, + ) + trainer.run() + dist.destroy_process_group() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="./testdata", type=str, help="directory to create random data") + # must parse the command-line argument: ``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by DDP + parser.add_argument("--local_rank", type=int) + args = parser.parse_args() + + train(args=args) + + +# usage example(refer to https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py): + +# python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_PER_NODE +# --nnodes=NUM_NODES --node_rank=INDEX_CURRENT_NODE +# --master_addr="192.168.1.1" --master_port=1234 +# unet_training_workflows.py -d DIR_OF_TESTDATA + +if __name__ == "__main__": + main() diff --git a/fast_training_tutorial.ipynb b/acceleration/fast_training_tutorial.ipynb similarity index 99% rename from fast_training_tutorial.ipynb rename to acceleration/fast_training_tutorial.ipynb index 88bf432797..962d4b74cd 100644 --- a/fast_training_tutorial.ipynb +++ b/acceleration/fast_training_tutorial.ipynb @@ -18,7 +18,7 @@ "\n", "It's modified from the Spleen 3D segmentation tutorial notebook, the Spleen dataset can be downloaded from http://medicaldecathlon.com/.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/fast_training_tutorial.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb)" ] }, { diff --git a/multi_gpu_test.ipynb b/acceleration/multi_gpu_test.ipynb similarity index 98% rename from multi_gpu_test.ipynb rename to acceleration/multi_gpu_test.ipynb index 73ebdfe8ab..292aea2f64 100644 --- a/multi_gpu_test.ipynb +++ b/acceleration/multi_gpu_test.ipynb @@ -6,7 +6,7 @@ "source": [ "# Multi GPU Test\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/multi_gpu_test.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/multi_gpu_test.ipynb)" ] }, { @@ -270,7 +270,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/transform_speed.ipynb b/acceleration/transform_speed.ipynb similarity index 99% rename from transform_speed.ipynb rename to acceleration/transform_speed.ipynb index 3246c87daf..f723da0dbb 100644 --- a/transform_speed.ipynb +++ b/acceleration/transform_speed.ipynb @@ -8,7 +8,7 @@ "\n", "The purpose of this notebook is to illustrate reading Nifti files and test speed of different methods.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/transform_speed.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/transform_speed.ipynb)" ] }, { @@ -489,7 +489,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/images/brats_tasks.png b/figures/brats_tasks.png similarity index 100% rename from images/brats_tasks.png rename to figures/brats_tasks.png diff --git a/images/models_ensemble.png b/figures/models_ensemble.png similarity index 100% rename from images/models_ensemble.png rename to figures/models_ensemble.png diff --git a/images/multi_transform_chains.png b/figures/multi_transform_chains.png similarity index 100% rename from images/multi_transform_chains.png rename to figures/multi_transform_chains.png diff --git a/3d_image_transforms.ipynb b/modules/3d_image_transforms.ipynb similarity index 99% rename from 3d_image_transforms.ipynb rename to modules/3d_image_transforms.ipynb index 221342eb1d..526db53cfb 100644 --- a/3d_image_transforms.ipynb +++ b/modules/3d_image_transforms.ipynb @@ -8,7 +8,7 @@ "\n", "This notebook introduces you MONAI's transformation module for 3D images.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/3d_image_transforms.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/3d_image_transforms.ipynb)" ] }, { @@ -811,7 +811,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/dynunet_tutorial.ipynb b/modules/dynunet_tutorial.ipynb similarity index 99% rename from dynunet_tutorial.ipynb rename to modules/dynunet_tutorial.ipynb index 415c6b0839..d5568344ca 100644 --- a/dynunet_tutorial.ipynb +++ b/modules/dynunet_tutorial.ipynb @@ -13,7 +13,7 @@ "\n", "`nnU-Net: Self-adapting Framework for U-Net-Based Medical Image Segmentation `\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/nnunet_tutorial.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/nnunet_tutorial.ipynb)" ] }, { diff --git a/integrate_3rd_party_transforms.ipynb b/modules/integrate_3rd_party_transforms.ipynb similarity index 99% rename from integrate_3rd_party_transforms.ipynb rename to modules/integrate_3rd_party_transforms.ipynb index 79aab6225a..9e7e0445c5 100644 --- a/integrate_3rd_party_transforms.ipynb +++ b/modules/integrate_3rd_party_transforms.ipynb @@ -9,7 +9,7 @@ "This tutorial shows how to integrate 3rd party transforms into a MONAI program. \n", "Mainly showing transforms from `BatchGenerator`, `TorchIO`, `Rising` and `ITK`.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/integrate_3rd_party_transforms.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/integrate_3rd_party_transforms.ipynb)" ] }, { @@ -521,7 +521,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/load_medical_images.ipynb b/modules/load_medical_images.ipynb similarity index 99% rename from load_medical_images.ipynb rename to modules/load_medical_images.ipynb index f0af97b1b0..ae88a15597 100644 --- a/load_medical_images.ipynb +++ b/modules/load_medical_images.ipynb @@ -8,7 +8,7 @@ "\n", "This notebook introduces how to easily load different formats of medical images in MONAI and execute many additional operations.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/load_medical_images.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/load_medical_images.ipynb)" ] }, { diff --git a/mednist_GAN_tutorial.ipynb b/modules/mednist_GAN_tutorial.ipynb similarity index 99% rename from mednist_GAN_tutorial.ipynb rename to modules/mednist_GAN_tutorial.ipynb index d9dafb5f9a..6335195bfb 100644 --- a/mednist_GAN_tutorial.ipynb +++ b/modules/mednist_GAN_tutorial.ipynb @@ -14,7 +14,7 @@ "* Defining the networks\n", "* Training and evaluation\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/mednist_GAN_tutorial.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/mednist_GAN_tutorial.ipynb)" ] }, { @@ -497,7 +497,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/mednist_GAN_workflow.ipynb b/modules/mednist_GAN_workflow.ipynb similarity index 99% rename from mednist_GAN_workflow.ipynb rename to modules/mednist_GAN_workflow.ipynb index 5979eb598f..ffe8c13885 100644 --- a/mednist_GAN_workflow.ipynb +++ b/modules/mednist_GAN_workflow.ipynb @@ -23,7 +23,7 @@ "3. Run Training\n", "4. Evaluate Results\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/mednist_GAN_workflow.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/mednist_GAN_workflow.ipynb)" ] }, { @@ -649,7 +649,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/models_ensemble.ipynb b/modules/models_ensemble.ipynb similarity index 99% rename from models_ensemble.ipynb rename to modules/models_ensemble.ipynb index c9fd989626..75b8cc47dc 100644 --- a/models_ensemble.ipynb +++ b/modules/models_ensemble.ipynb @@ -13,13 +13,13 @@ "* Execute inference on the test data with all the K models.\n", "* Compute the average values with weights or vote the most common value as the final result.\n", "

\n", - "models_ensemble\n", + "models_ensemble\n", "

\n", "\n", "MONAI provides `EnsembleEvaluator` and `MeanEnsemble`, `VoteEnsemble` post transforms. \n", "This tutorial shows how to leverage ensemble modules in MONAI to set up ensemble program.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/models_ensemble.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/models_ensemble.ipynb)" ] }, { @@ -579,7 +579,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/nifti_read_example.ipynb b/modules/nifti_read_example.ipynb similarity index 98% rename from nifti_read_example.ipynb rename to modules/nifti_read_example.ipynb index ba2029855b..78eacca151 100644 --- a/nifti_read_example.ipynb +++ b/modules/nifti_read_example.ipynb @@ -8,7 +8,7 @@ "\n", "The purpose of this notebook is to illustrate reading Nifti files and iterating over patches of the volumes loaded from them.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/nifti_read_example.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/nifti_read_example.ipynb)" ] }, { @@ -291,7 +291,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/post_transforms.ipynb b/modules/post_transforms.ipynb similarity index 99% rename from post_transforms.ipynb rename to modules/post_transforms.ipynb index 03cf16527c..36e0c09c74 100644 --- a/post_transforms.ipynb +++ b/modules/post_transforms.ipynb @@ -17,12 +17,12 @@ "\n", "A typical usage is to scale and concatenate 3 different intensity ranges of an input image:\n", "

\n", - "multi_transform_chains\n", + "multi_transform_chains\n", "

\n", "\n", "This tutorial shows several of above post transforms based on the model output of spleen segmentation.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/post_transforms.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/post_transforms.ipynb)" ] }, { @@ -641,7 +641,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/public_datasets.ipynb b/modules/public_datasets.ipynb similarity index 99% rename from public_datasets.ipynb rename to modules/public_datasets.ipynb index 92075981d7..252e114f33 100644 --- a/public_datasets.ipynb +++ b/modules/public_datasets.ipynb @@ -15,7 +15,7 @@ "* Create training experiment with DecathlonDataset and workflow\n", "* Share other public data and add Dataset in MONAI\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/public_datasets.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/public_datasets.ipynb)" ] }, { @@ -743,7 +743,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/transforms_demo_2d.ipynb b/modules/transforms_demo_2d.ipynb similarity index 99% rename from transforms_demo_2d.ipynb rename to modules/transforms_demo_2d.ipynb index c8393d5338..eac2624bd7 100644 --- a/transforms_demo_2d.ipynb +++ b/modules/transforms_demo_2d.ipynb @@ -13,7 +13,7 @@ " \n", "Find out more in MONAI's wiki page: https://github.com/Project-MONAI/MONAI/wiki\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/Tutorials/blob/master/transforms_demo_2d.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Project-MONAI/tutorials/blob/master/acceleration/transforms_demo_2d.ipynb)" ] }, { @@ -431,7 +431,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.6.10" } }, "nbformat": 4, diff --git a/modules/workflows/gan_evaluation.py b/modules/workflows/gan_evaluation.py new file mode 100644 index 0000000000..8f7ebcfa0e --- /dev/null +++ b/modules/workflows/gan_evaluation.py @@ -0,0 +1,67 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +MONAI GAN Evaluation Example + Generate fake images from trained generator file. + +""" + +import logging +import os +import sys +from glob import glob + +import torch + +import monai +from monai.data import png_writer +from monai.engines.utils import default_make_latent as make_latent +from monai.networks.nets import Generator +from monai.utils.misc import set_determinism + + +def save_generator_fakes(run_folder, g_output_tensor): + for i, image in enumerate(g_output_tensor): + filename = "gen-fake-%d.png" % i + save_path = os.path.join(run_folder, filename) + img_array = image[0].cpu().data.numpy() + png_writer.write_png(img_array, save_path, scale=255) + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + set_determinism(12345) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # load generator + network_filepath = glob("./model_out/*.pth")[0] + data = torch.load(network_filepath) + latent_size = 64 + gen_net = Generator( + latent_shape=latent_size, start_shape=(latent_size, 8, 8), channels=[32, 16, 8, 1], strides=[2, 2, 2, 1] + ) + gen_net.conv.add_module("activation", torch.nn.Sigmoid()) + gen_net.load_state_dict(data["g_net"]) + gen_net = gen_net.to(device) + + # create fakes + output_dir = "./generated_images" + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + num_fakes = 10 + print("Generating %d fakes and saving in %s" % (num_fakes, output_dir)) + fake_latents = make_latent(num_fakes, latent_size).to(device) + save_generator_fakes(output_dir, gen_net(fake_latents)) + + +if __name__ == "__main__": + main() diff --git a/modules/workflows/gan_training.py b/modules/workflows/gan_training.py new file mode 100644 index 0000000000..b745db8da9 --- /dev/null +++ b/modules/workflows/gan_training.py @@ -0,0 +1,203 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +MONAI Generative Adversarial Networks Workflow Example + Sample script using MONAI to train a GAN to synthesize images from a latent code. + +## Get the dataset + MedNIST.tar.gz link: https://www.dropbox.com/s/5wwskxctvcxiuea/MedNIST.tar.gz + Extract tarball and set input_dir variable. GAN script trains using hand CT scan jpg images. + + Dataset information available in MedNIST Tutorial + https://github.com/Project-MONAI/Tutorials/blob/master/mednist_tutorial.ipynb +""" + +import logging +import os +import sys + +import torch + +import monai +from monai.apps.utils import download_and_extract +from monai.data import CacheDataset, DataLoader, png_writer +from monai.engines import GanTrainer +from monai.engines.utils import GanKeys as Keys +from monai.engines.utils import default_make_latent as make_latent +from monai.handlers import CheckpointSaver, StatsHandler +from monai.networks import normal_init +from monai.networks.nets import Discriminator, Generator +from monai.transforms import ( + AddChannelD, + Compose, + LoadPNGD, + RandFlipD, + RandRotateD, + RandZoomD, + ScaleIntensityD, + ToTensorD, +) +from monai.utils.misc import set_determinism + + +def main(): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + set_determinism(12345) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # load real data + mednist_url = "https://www.dropbox.com/s/5wwskxctvcxiuea/MedNIST.tar.gz?dl=1" + md5_value = "0bc7306e7427e00ad1c5526a6677552d" + extract_dir = "data" + tar_save_path = os.path.join(extract_dir, "MedNIST.tar.gz") + download_and_extract(mednist_url, tar_save_path, extract_dir, md5_value) + hand_dir = os.path.join(extract_dir, "MedNIST", "Hand") + real_data = [{"hand": os.path.join(hand_dir, filename)} for filename in os.listdir(hand_dir)] + + # define real data transforms + train_transforms = Compose( + [ + LoadPNGD(keys=["hand"]), + AddChannelD(keys=["hand"]), + ScaleIntensityD(keys=["hand"]), + RandRotateD(keys=["hand"], range_x=15, prob=0.5, keep_size=True), + RandFlipD(keys=["hand"], spatial_axis=0, prob=0.5), + RandZoomD(keys=["hand"], min_zoom=0.9, max_zoom=1.1, prob=0.5), + ToTensorD(keys=["hand"]), + ] + ) + + # create dataset and dataloader + real_dataset = CacheDataset(real_data, train_transforms) + batch_size = 300 + real_dataloader = DataLoader(real_dataset, batch_size=batch_size, shuffle=True, num_workers=10) + + # define function to process batchdata for input into discriminator + def prepare_batch(batchdata): + """ + Process Dataloader batchdata dict object and return image tensors for D Inferer + """ + return batchdata["hand"] + + # define networks + disc_net = Discriminator( + in_shape=(1, 64, 64), channels=(8, 16, 32, 64, 1), strides=(2, 2, 2, 2, 1), num_res_units=1, kernel_size=5 + ).to(device) + + latent_size = 64 + gen_net = Generator( + latent_shape=latent_size, start_shape=(latent_size, 8, 8), channels=[32, 16, 8, 1], strides=[2, 2, 2, 1] + ) + + # initialize both networks + disc_net.apply(normal_init) + gen_net.apply(normal_init) + + # input images are scaled to [0,1] so enforce the same of generated outputs + gen_net.conv.add_module("activation", torch.nn.Sigmoid()) + gen_net = gen_net.to(device) + + # create optimizers and loss functions + learning_rate = 2e-4 + betas = (0.5, 0.999) + disc_opt = torch.optim.Adam(disc_net.parameters(), learning_rate, betas=betas) + gen_opt = torch.optim.Adam(gen_net.parameters(), learning_rate, betas=betas) + + disc_loss_criterion = torch.nn.BCELoss() + gen_loss_criterion = torch.nn.BCELoss() + real_label = 1 + fake_label = 0 + + def discriminator_loss(gen_images, real_images): + """ + The discriminator loss is calculated by comparing D + prediction for real and generated images. + + """ + real = real_images.new_full((real_images.shape[0], 1), real_label) + gen = gen_images.new_full((gen_images.shape[0], 1), fake_label) + + realloss = disc_loss_criterion(disc_net(real_images), real) + genloss = disc_loss_criterion(disc_net(gen_images.detach()), gen) + + return (genloss + realloss) / 2 + + def generator_loss(gen_images): + """ + The generator loss is calculated by determining how realistic + the discriminator classifies the generated images. + + """ + output = disc_net(gen_images) + cats = output.new_full(output.shape, real_label) + return gen_loss_criterion(output, cats) + + # initialize current run dir + run_dir = "model_out" + print("Saving model output to: %s " % run_dir) + + # create workflow handlers + handlers = [ + StatsHandler( + name="batch_training_loss", + output_transform=lambda x: {Keys.GLOSS: x[Keys.GLOSS], Keys.DLOSS: x[Keys.DLOSS]}, + ), + CheckpointSaver( + save_dir=run_dir, + save_dict={"g_net": gen_net, "d_net": disc_net}, + save_interval=10, + save_final=True, + epoch_level=True, + ), + ] + + # define key metric + key_train_metric = None + + # create adversarial trainer + disc_train_steps = 5 + num_epochs = 50 + + trainer = GanTrainer( + device, + num_epochs, + real_dataloader, + gen_net, + gen_opt, + generator_loss, + disc_net, + disc_opt, + discriminator_loss, + d_prepare_batch=prepare_batch, + d_train_steps=disc_train_steps, + latent_shape=latent_size, + key_train_metric=key_train_metric, + train_handlers=handlers, + ) + + # run GAN training + trainer.run() + + # Training completed, save a few random generated images. + print("Saving trained generator sample output.") + test_img_count = 10 + test_latents = make_latent(test_img_count, latent_size).to(device) + fakes = gen_net(test_latents) + for i, image in enumerate(fakes): + filename = "gen-fake-final-%d.png" % i + save_path = os.path.join(run_dir, filename) + img_array = image[0].cpu().data.numpy() + png_writer.write_png(img_array, save_path, scale=255) + + +if __name__ == "__main__": + main() diff --git a/modules/workflows/unet_evaluation_dict.py b/modules/workflows/unet_evaluation_dict.py new file mode 100644 index 0000000000..48f195ed01 --- /dev/null +++ b/modules/workflows/unet_evaluation_dict.py @@ -0,0 +1,121 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from ignite.metrics import Accuracy + +import monai +from monai.data import create_test_image_3d +from monai.engines import SupervisedEvaluator +from monai.handlers import CheckpointLoader, MeanDice, SegmentationSaver, StatsHandler +from monai.inferers import SlidingWindowInferer +from monai.transforms import ( + Activationsd, + AsChannelFirstd, + AsDiscreted, + Compose, + KeepLargestConnectedComponentd, + LoadNiftid, + ScaleIntensityd, + ToTensord, +) + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(5): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + val_files = [{"image": img, "label": seg} for img, seg in zip(images, segs)] + + # model file path + model_file = glob("./runs/net_key_metric*")[0] + + # define transforms for image and segmentation + val_transforms = Compose( + [ + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys=["image", "label"], channel_dim=-1), + ScaleIntensityd(keys="image"), + ToTensord(keys=["image", "label"]), + ] + ) + + # create a validation data loader + val_ds = monai.data.Dataset(data=val_files, transform=val_transforms) + val_loader = monai.data.DataLoader(val_ds, batch_size=1, num_workers=4) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + net = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + + val_post_transforms = Compose( + [ + Activationsd(keys="pred", sigmoid=True), + AsDiscreted(keys="pred", threshold_values=True), + KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), + ] + ) + val_handlers = [ + StatsHandler(output_transform=lambda x: None), + CheckpointLoader(load_path=model_file, load_dict={"net": net}), + SegmentationSaver( + output_dir="./runs/", + batch_transform=lambda batch: batch["image_meta_dict"], + output_transform=lambda output: output["pred"], + ), + ] + + evaluator = SupervisedEvaluator( + device=device, + val_data_loader=val_loader, + network=net, + inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), + post_transform=val_post_transforms, + key_val_metric={ + "val_mean_dice": MeanDice(include_background=True, output_transform=lambda x: (x["pred"], x["label"])) + }, + additional_metrics={"val_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]))}, + val_handlers=val_handlers, + # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation + amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, + ) + evaluator.run() + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/modules/workflows/unet_training_dict.py b/modules/workflows/unet_training_dict.py new file mode 100644 index 0000000000..1f5dbb4c09 --- /dev/null +++ b/modules/workflows/unet_training_dict.py @@ -0,0 +1,179 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +import tempfile +from glob import glob + +import nibabel as nib +import numpy as np +import torch +from ignite.metrics import Accuracy + +import monai +from monai.data import create_test_image_3d +from monai.engines import SupervisedEvaluator, SupervisedTrainer +from monai.handlers import ( + CheckpointSaver, + LrScheduleHandler, + MeanDice, + StatsHandler, + TensorBoardImageHandler, + TensorBoardStatsHandler, + ValidationHandler, +) +from monai.inferers import SimpleInferer, SlidingWindowInferer +from monai.transforms import ( + Activationsd, + AsChannelFirstd, + AsDiscreted, + Compose, + KeepLargestConnectedComponentd, + LoadNiftid, + RandCropByPosNegLabeld, + RandRotate90d, + ScaleIntensityd, + ToTensord, +) + + +def main(tempdir): + monai.config.print_config() + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + # create a temporary directory and 40 random image, mask pairs + print(f"generating synthetic data to {tempdir} (this may take a while)") + for i in range(40): + im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1) + n = nib.Nifti1Image(im, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz")) + n = nib.Nifti1Image(seg, np.eye(4)) + nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) + + images = sorted(glob(os.path.join(tempdir, "img*.nii.gz"))) + segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) + train_files = [{"image": img, "label": seg} for img, seg in zip(images[:20], segs[:20])] + val_files = [{"image": img, "label": seg} for img, seg in zip(images[-20:], segs[-20:])] + + # define transforms for image and segmentation + train_transforms = Compose( + [ + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys=["image", "label"], channel_dim=-1), + ScaleIntensityd(keys="image"), + RandCropByPosNegLabeld( + keys=["image", "label"], label_key="label", spatial_size=[96, 96, 96], pos=1, neg=1, num_samples=4 + ), + RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=[0, 2]), + ToTensord(keys=["image", "label"]), + ] + ) + val_transforms = Compose( + [ + LoadNiftid(keys=["image", "label"]), + AsChannelFirstd(keys=["image", "label"], channel_dim=-1), + ScaleIntensityd(keys="image"), + ToTensord(keys=["image", "label"]), + ] + ) + + # create a training data loader + train_ds = monai.data.CacheDataset(data=train_files, transform=train_transforms, cache_rate=0.5) + # use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training + train_loader = monai.data.DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=4) + # create a validation data loader + val_ds = monai.data.CacheDataset(data=val_files, transform=val_transforms, cache_rate=1.0) + val_loader = monai.data.DataLoader(val_ds, batch_size=1, num_workers=4) + + # create UNet, DiceLoss and Adam optimizer + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + net = monai.networks.nets.UNet( + dimensions=3, + in_channels=1, + out_channels=1, + channels=(16, 32, 64, 128, 256), + strides=(2, 2, 2, 2), + num_res_units=2, + ).to(device) + loss = monai.losses.DiceLoss(sigmoid=True) + opt = torch.optim.Adam(net.parameters(), 1e-3) + lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=2, gamma=0.1) + + val_post_transforms = Compose( + [ + Activationsd(keys="pred", sigmoid=True), + AsDiscreted(keys="pred", threshold_values=True), + KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), + ] + ) + val_handlers = [ + StatsHandler(output_transform=lambda x: None), + TensorBoardStatsHandler(log_dir="./runs/", output_transform=lambda x: None), + TensorBoardImageHandler( + log_dir="./runs/", + batch_transform=lambda x: (x["image"], x["label"]), + output_transform=lambda x: x["pred"], + ), + CheckpointSaver(save_dir="./runs/", save_dict={"net": net}, save_key_metric=True), + ] + + evaluator = SupervisedEvaluator( + device=device, + val_data_loader=val_loader, + network=net, + inferer=SlidingWindowInferer(roi_size=(96, 96, 96), sw_batch_size=4, overlap=0.5), + post_transform=val_post_transforms, + key_val_metric={ + "val_mean_dice": MeanDice(include_background=True, output_transform=lambda x: (x["pred"], x["label"])) + }, + additional_metrics={"val_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]))}, + val_handlers=val_handlers, + # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP evaluation + amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, + ) + + train_post_transforms = Compose( + [ + Activationsd(keys="pred", sigmoid=True), + AsDiscreted(keys="pred", threshold_values=True), + KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]), + ] + ) + train_handlers = [ + LrScheduleHandler(lr_scheduler=lr_scheduler, print_lr=True), + ValidationHandler(validator=evaluator, interval=2, epoch_level=True), + StatsHandler(tag_name="train_loss", output_transform=lambda x: x["loss"]), + TensorBoardStatsHandler(log_dir="./runs/", tag_name="train_loss", output_transform=lambda x: x["loss"]), + CheckpointSaver(save_dir="./runs/", save_dict={"net": net, "opt": opt}, save_interval=2, epoch_level=True), + ] + + trainer = SupervisedTrainer( + device=device, + max_epochs=5, + train_data_loader=train_loader, + network=net, + optimizer=opt, + loss_function=loss, + inferer=SimpleInferer(), + post_transform=train_post_transforms, + key_train_metric={"train_acc": Accuracy(output_transform=lambda x: (x["pred"], x["label"]))}, + train_handlers=train_handlers, + # if no FP16 support in GPU or PyTorch version < 1.6, will not enable AMP training + amp=True if monai.config.get_torch_version_tuple() >= (1, 6) else False, + ) + trainer.run() + + +if __name__ == "__main__": + with tempfile.TemporaryDirectory() as tempdir: + main(tempdir) diff --git a/runexamples.sh b/runexamples.sh new file mode 100755 index 0000000000..7035a79b5a --- /dev/null +++ b/runexamples.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +# script for running the examples + + +# install necessary packages +pip install numpy +pip install torch +pip install 'monai[itk, nibabel, pillow]' + + +# home directory +homedir="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +TEMP_LOG="temp.txt" + +cd "$homedir" +find "$homedir" -type f -name $TEMP_LOG -delete + + +# download data to specific directory +if [ -e "./testing_ixi_t1.tar.gz" ] && [ -d "./workspace/" ]; then + echo "1" >> $TEMP_LOG +else + wget https://www.dropbox.com/s/y890gb6axzzqff5/testing_ixi_t1.tar.gz?dl=1 + mv testing_ixi_t1.tar.gz?dl=1 testing_ixi_t1.tar.gz + mkdir -p ./workspace/data/medical/ixi/IXI-T1/ + tar -C ./workspace/data/medical/ixi/IXI-T1/ -xf testing_ixi_t1.tar.gz +fi + + +# run training files in 3d_classification/torch +for file in "3d_classification/torch"/*train* +do + python "$file" +done + +# check training files generated from 3d_classification/torch +[ -e "./best_metric_model_classification3d_array.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d classification torch: model file not generated" | tee $TEMP_LOG && exit 0) +[ -e "./best_metric_model_classification3d_dict.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d classification torch: model file not generated" | tee $TEMP_LOG && exit 0) + +# run eval files in 3d_classification/torch +for file in "3d_classification/torch"/*eval* +do + python "$file" +done + + +# run training files in 3d_classification/ignite +for file in "3d_classification/ignite"/*train* +do + python "$file" +done + +# check training files generated from 3d_classification/ignite +[ -e "./runs_array/net_checkpoint_20.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d classification ignite: model file not generated" | tee $TEMP_LOG && exit 0) +[ -e "./runs_dict/net_checkpoint_20.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d classification ignite: model file not generated" | tee $TEMP_LOG && exit 0) + +# run eval files in 3d_classification/ignite +for file in "3d_classification/ignite"/*eval* +do + python "$file" +done + + +# run training files in 2d_segmentation/torch +for file in "2d_segmentation/torch"/*train* +do + python "$file" +done + +# check training files generated from 2d_segmentation/torch +[ -e "./best_metric_model_segmentation2d_array.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 2d segmentation torch: model file not generated" | tee $TEMP_LOG && exit 0) +[ -e "./best_metric_model_segmentation2d_dict.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 2d segmentation torch: model file not generated" | tee $TEMP_LOG && exit 0) + +# run eval files in 2d_segmentation/torch +for file in "2d_segmentation/torch"/*eval* +do + python "$file" +done + + +# run training files in 3d_segmentation/torch +for file in "3d_segmentation/torch"/*train* +do + python "$file" +done + +# check training files generated from 3d_segmentation/torch +[ -e "./best_metric_model_segmentation3d_array.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d segmentation torch: model file not generated" | tee $TEMP_LOG && exit 0) +[ -e "./best_metric_model_segmentation3d_dict.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d segmentation torch: model file not generated" | tee $TEMP_LOG && exit 0) + +# run eval files in 3d_segmentation/torch +for file in "3d_segmentation/torch"/*eval* +do + python "$file" +done + + +# run training files in 3d_segmentation/ignite +for file in "3d_segmentation/ignite"/*train* +do + python "$file" +done + +# check training files generated from 3d_segmentation/ignite +[ -e "./runs_array/net_checkpoint_100.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d segmentation ignite: model file not generated" | tee $TEMP_LOG && exit 0) +[ -e "./runs_dict/net_checkpoint_50.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples 3d segmentation ignite: model file not generated" | tee $TEMP_LOG && exit 0) + +# run eval files in 3d_segmentation/ignite +for file in "3d_segmentation/ignite"/*eval* +do + python "$file" +done + + +# run training file in modules/workflows +for file in "modules/workflows"/*train* +do + python "$file" +done + +# check training file generated from modules/workflows +[ -e "./runs/net_key_metric*.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples supervised workflows: model file not generated" | tee $TEMP_LOG && exit 0) +[ -e "./model_out/*.pth" ] && echo "1" >> $TEMP_LOG || (echo "examples GAN workflows: model file not generated" | tee $TEMP_LOG && exit 0) + +# run eval file in modules/workflows +for file in "modules/workflows"/*eval* +do + python "$file" +done