Merge branch 'main' into vision-mamba

anwai98 · anwai98 · commit 55fba226d232 · 2024-03-20T00:09:31.000+01:00
diff --git a/scripts/datasets/check_dynamicnuclearnet.py b/scripts/datasets/check_dynamicnuclearnet.py
@@ -0,0 +1,20 @@
+from torch_em.util.debug import check_loader
+from torch_em.data.datasets import get_dynamicnuclearnet_loader
+
+
+DYNAMICNUCLEARNET_ROOT = "/home/anwai/data/deepcell/"
+
+
+# NOTE: the DynamicNuclearNet data cannot be downloaded automatically.
+# you need to download it yourself from https://datasets.deepcell.org/data
+def check_dynamicnuclearnet():
+    # set this path to where you have downloaded the dynamicnuclearnet data
+    loader = get_dynamicnuclearnet_loader(
+        DYNAMICNUCLEARNET_ROOT, "train",
+        patch_shape=(512, 512), batch_size=2, download=True
+    )
+    check_loader(loader, 10, instance_labels=True, rgb=False)
+
+
+if __name__ == "__main__":
+    check_dynamicnuclearnet()
diff --git a/torch_em/__version__.py b/torch_em/__version__.py
@@ -1 +1 @@
-__version__ = "0.6.1"
+__version__ = "0.6.2"
diff --git a/torch_em/data/datasets/__init__.py b/torch_em/data/datasets/__init__.py
@@ -6,6 +6,7 @@
 from .ctc import get_ctc_segmentation_loader, get_ctc_segmentation_dataset
 from .deepbacs import get_deepbacs_loader, get_deepbacs_dataset
 from .dsb import get_dsb_loader, get_dsb_dataset
+from .dynamicnuclearnet import get_dynamicnuclearnet_loader, get_dynamicnuclearnet_dataset
 from .hpa import get_hpa_segmentation_loader, get_hpa_segmentation_dataset
 from .isbi2012 import get_isbi_loader, get_isbi_dataset
 from .kasthuri import get_kasthuri_loader, get_kasthuri_dataset
diff --git a/torch_em/data/datasets/ctc.py b/torch_em/data/datasets/ctc.py
@@ -48,7 +48,7 @@ def get_ctc_url_and_checksum(dataset_name, split):
 def _require_ctc_dataset(path, dataset_name, download, split):
     dataset_names = list(CTC_CHECKSUMS["train"].keys())
     if dataset_name not in dataset_names:
-        raise ValueError(f"Inalid dataset: {dataset_name}, choose one of {dataset_names}.")
+        raise ValueError(f"Invalid dataset: {dataset_name}, choose one of {dataset_names}.")
 
     data_path = os.path.join(path, split, dataset_name)
 
diff --git a/torch_em/data/datasets/dynamicnuclearnet.py b/torch_em/data/datasets/dynamicnuclearnet.py
@@ -0,0 +1,93 @@
+import os
+from tqdm import tqdm
+from glob import glob
+
+import z5py
+import numpy as np
+import pandas as pd
+
+import torch_em
+
+from . import util
+
+
+# Automatic download is currently not possible, because of authentication
+URL = None  # TODO: here - https://datasets.deepcell.org/data
+
+
+def _create_split(path, split):
+    split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")
+    split_folder = os.path.join(path, split)
+    os.makedirs(split_folder, exist_ok=True)
+    data = np.load(split_file, allow_pickle=True)
+
+    x, y = data["X"], data["y"]
+    metadata = data["meta"]
+    metadata = pd.DataFrame(metadata[1:], columns=metadata[0])
+
+    for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"):
+        out_path = os.path.join(split_folder, f"image_{i:04}.zarr")
+        image_channel = im[..., 0]
+        label_channel = label[..., 0]
+        chunks = image_channel.shape
+        with z5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks)
+            f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks)
+
+    os.remove(split_file)
+
+
+def _create_dataset(path, zip_path):
+    util.unzip(zip_path, path, remove=False)
+    splits = ["train", "val", "test"]
+    assert all(
+        [os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits]
+    )
+    for split in splits:
+        _create_split(path, split)
+
+
+def get_dynamicnuclearnet_dataset(
+    path, split, patch_shape, download=False, **kwargs
+):
+    """Dataset for the segmentation of cell nuclei imaged with fluorescene microscopy.
+
+    This dataset is from the publication https://doi.org/10.1101/803205.
+    Please cite it if you use this dataset for a publication."""
+    splits = ["train", "val", "test"]
+    assert split in splits
+
+    # check if the dataset exists already
+    zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip")
+    if all([os.path.exists(os.path.join(path, split)) for split in splits]):  # yes it does
+        pass
+    elif os.path.exists(zip_path):  # no it does not, but we have the zip there and can unpack it
+        _create_dataset(path, zip_path)
+    else:
+        raise RuntimeError(
+            "We do not support automatic download for the dynamic nuclear net dataset yet."
+            f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}"
+        )
+
+    split_folder = os.path.join(path, split)
+    assert os.path.exists(split_folder)
+    data_path = glob(os.path.join(split_folder, "*.zarr"))
+    assert len(data_path) > 0
+
+    raw_key, label_key = "raw", "labels"
+
+    return torch_em.default_segmentation_dataset(
+        data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs
+    )
+
+
+def get_dynamicnuclearnet_loader(
+    path, split, patch_shape, batch_size, download, **kwargs
+):
+    """Dataloader for the segmentation of cell nuclei for 5 different cell lines in fluorescence microscopes.
+    See `get_dynamicnuclearnet_dataset` for details.
+"""
+    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
+    dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs)
+    loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
+    return loader
diff --git a/torch_em/loss/dice.py b/torch_em/loss/dice.py
@@ -89,6 +89,7 @@ def __init__(self, channelwise=True, eps=1e-7, reduce_channel="sum"):
         super().__init__()
         self.channelwise = channelwise
         self.eps = eps
+        self.reduce_channel = reduce_channel
 
         # all torch_em classes should store init kwargs to easily recreate the init call
         self.init_kwargs = {"channelwise": channelwise, "eps": self.eps, "reduce_channel": self.reduce_channel}
diff --git a/torch_em/model/unetr.py b/torch_em/model/unetr.py
@@ -6,7 +6,7 @@
 import torch.nn.functional as F
 
 from .unet import Decoder, ConvBlock2d, Upsampler2d
-from .vit import get_vision_transformer, ViT_MAE, ViT_Sam
+from .vit import get_vision_transformer
 
 try:
     from micro_sam.util import get_sam_model
@@ -244,7 +244,10 @@ def forward(self, x):
 
         encoder_outputs = self.encoder(x)
 
-        if isinstance(self.encoder, ViT_Sam) or isinstance(self.encoder, ViT_MAE):
+        if isinstance(encoder_outputs[-1], list):
+            # `encoder_outputs` can be arranged in only two forms:
+            #   - either we only return the image embeddings
+            #   - or, we return the image embeddings and the "list" of global attention layers
             z12, from_encoder = encoder_outputs
         else:
             z12 = encoder_outputs
diff --git a/torch_em/self_training/fix_match.py b/torch_em/self_training/fix_match.py
@@ -136,7 +136,7 @@ def __init__(
     # functionality for saving checkpoints and initialization
     #
 
-    def save_checkpoint(self, name, best_metric, **extra_save_dict):
+    def save_checkpoint(self, name, current_metric, best_metric, **extra_save_dict):
         train_loader_kwargs = get_constructor_arguments(self.train_loader)
         val_loader_kwargs = get_constructor_arguments(self.val_loader)
         extra_state = {
@@ -152,7 +152,7 @@ def save_checkpoint(self, name, best_metric, **extra_save_dict):
             },
         }
         extra_state.update(**extra_save_dict)
-        super().save_checkpoint(name, best_metric, **extra_state)
+        super().save_checkpoint(name, current_metric, best_metric, **extra_state)
 
     # distribution alignment - encourages the distribution of the model's generated pseudo labels to match the marginal
     #                          distribution of pseudo labels from the source transfer
diff --git a/torch_em/self_training/mean_teacher.py b/torch_em/self_training/mean_teacher.py
@@ -171,7 +171,7 @@ def _momentum_update(self):
     # functionality for saving checkpoints and initialization
     #
 
-    def save_checkpoint(self, name, best_metric, **extra_save_dict):
+    def save_checkpoint(self, name, current_metric, best_metric, **extra_save_dict):
         train_loader_kwargs = get_constructor_arguments(self.train_loader)
         val_loader_kwargs = get_constructor_arguments(self.val_loader)
         extra_state = {
@@ -188,7 +188,7 @@ def save_checkpoint(self, name, best_metric, **extra_save_dict):
             },
         }
         extra_state.update(**extra_save_dict)
-        super().save_checkpoint(name, best_metric, **extra_state)
+        super().save_checkpoint(name, current_metric, best_metric, **extra_state)
 
     def load_checkpoint(self, checkpoint="best"):
         save_dict = super().load_checkpoint(checkpoint)
diff --git a/torch_em/trainer/default_trainer.py b/torch_em/trainer/default_trainer.py
@@ -458,14 +458,15 @@ def _initialize(self, iterations, load_from_checkpoint, epochs=None):
         best_metric = np.inf
         return best_metric
 
-    def save_checkpoint(self, name, best_metric, train_time=0.0, **extra_save_dict):
+    def save_checkpoint(self, name, current_metric, best_metric, train_time=0.0, **extra_save_dict):
         save_path = os.path.join(self.checkpoint_folder, f"{name}.pt")
         extra_init_dict = extra_save_dict.pop("init", {})
         save_dict = {
             "iteration": self._iteration,
             "epoch": self._epoch,
             "best_epoch": self._best_epoch,
             "best_metric": best_metric,
+            "current_metric": current_metric,
             "model_state": self.model.state_dict(),
             "optimizer_state": self.optimizer.state_dict(),
             "init": self.init_data | extra_init_dict,
@@ -494,6 +495,7 @@ def load_checkpoint(self, checkpoint="best"):
         self._epoch = save_dict["epoch"]
         self._best_epoch = save_dict["best_epoch"]
         self.best_metric = save_dict["best_metric"]
+        self.current_metric = save_dict["current_metric"]
         self.train_time = save_dict.get("train_time", 0.0)
 
         model_state = save_dict["model_state"]
@@ -573,14 +575,16 @@ def fit(self, iterations=None, load_from_checkpoint=None, epochs=None, save_ever
             if current_metric < best_metric:
                 best_metric = current_metric
                 self._best_epoch = self._epoch
-                self.save_checkpoint("best", best_metric, train_time=total_train_time)
+                self.save_checkpoint("best", current_metric, best_metric, train_time=total_train_time)
 
             # save this checkpoint as the latest checkpoint
-            self.save_checkpoint("latest", best_metric, train_time=total_train_time)
+            self.save_checkpoint("latest", current_metric, best_metric, train_time=total_train_time)
 
             # if we save after every k-th epoch then check if we need to save now
             if save_every_kth_epoch is not None and (self._epoch + 1) % save_every_kth_epoch == 0:
-                self.save_checkpoint(f"epoch-{self._epoch + 1}", best_metric, train_time=total_train_time)
+                self.save_checkpoint(
+                    f"epoch-{self._epoch + 1}", current_metric, best_metric, train_time=total_train_time
+                )
 
             # if early stopping has been specified then check if the stopping condition is met
             if self.early_stopping is not None:
diff --git a/torch_em/trainer/spoco_trainer.py b/torch_em/trainer/spoco_trainer.py
@@ -32,8 +32,10 @@ def _momentum_update(self):
         for param_model, param_teacher in zip(self.model.parameters(), self.model2.parameters()):
             param_teacher.data = param_teacher.data * self.momentum + param_model.data * (1. - self.momentum)
 
-    def save_checkpoint(self, name, best_metric, **extra_save_dict):
-        super().save_checkpoint(name, best_metric, model2_state=self.model2.state_dict(), **extra_save_dict)
+    def save_checkpoint(self, name, current_metric, best_metric, **extra_save_dict):
+        super().save_checkpoint(
+            name, current_metric, best_metric, model2_state=self.model2.state_dict(), **extra_save_dict
+        )
 
     def load_checkpoint(self, checkpoint="best"):
         save_dict = super().load_checkpoint(checkpoint)
diff --git a/torch_em/util/modelzoo.py b/torch_em/util/modelzoo.py
@@ -125,8 +125,6 @@ def _write_depedencies(export_folder, dependencies):
         ver = torch.__version__
         major, minor = list(map(int, ver.split(".")[:2]))
         assert major in (1, 2)
-        if major == 2:
-            warn("Modelzoo functionality is not fully tested for PyTorch 2")
         # the torch zip layout changed for a few versions:
         torch_min_version = "1.0"
         if minor > 6 and minor < 10:
@@ -363,7 +361,7 @@ def _get_axes(axis):
         if std is not None:
             preprocessing[0]["kwargs"]["std"] = std
 
-    elif name == "torch_em.transform.normalize_percentile":
+    elif name == "torch_em.transform.raw.normalize_percentile":
 
         lower, upper = kwargs.get("lower", 1.0), kwargs.get("upper", 99.0)
         axes = _get_axes(kwargs.get("axis", None))

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.6.1"`
	`1`	`+__version__ = "0.6.2"`