Add inference script

anwai98 · anwai98 · commit ef05d9ccb872 · 2024-01-25T23:37:45.000+01:00
diff --git a/experiments/vision-mamba/run_livecell.py b/experiments/vision-mamba/run_livecell.py
@@ -1,5 +1,10 @@
 import os
 import argparse
+from glob import glob
+
+import imageio.v3 as imageio
+
+import torch
 
 import torch_em
 from torch_em.data.datasets import get_livecell_loader
@@ -55,8 +60,27 @@ def run_livecell_training(args):
     trainer.fit(iterations=int(args.iterations))
 
 
+def run_livecell_inference(args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # the vision-mamba + decoder (UNet-based) model
+    model = get_vimunet_model(checkpoint=args.checkpoint)
+
+    for image_path in glob(os.path.join(ROOT, "data", "livecell", "images", "livecell_test_images", "*")):
+        image = imageio.imread(image_path)
+        tensor_image = torch.from_numpy(image)[None, None].to(device)
+
+        predictions = model(tensor_image)
+        predictions = predictions.squeeze().detach().cpu().numpy()
+
+
 def main(args):
-    run_livecell_training(args)
+    if args.train:
+        run_livecell_training(args)
+
+    if args.predict:
+        assert args.checkpoint is not None, "Provide the checkpoint path to the trained model."
+        run_livecell_inference(args)
 
 
 if __name__ == "__main__":
@@ -65,5 +89,8 @@ def main(args):
     parser.add_argument("--iterations", type=int, default=1e4)
     parser.add_argument("-s", "--save_root", type=str, default=os.path.join(ROOT, "experiments", "vision-mamba"))
     parser.add_argument("--pretrained", action="store_true")
+    parser.add_argument("--train", action="store_true")
+    parser.add_argument("--predict", action="store_true")
+    parser.add_argument("-c", "--checkpoint", default=None, type=str)
     args = parser.parse_args()
     main(args)
diff --git a/experiments/vision-mamba/vimunet.py b/experiments/vision-mamba/vimunet.py
@@ -4,6 +4,8 @@
 
 # pretrained model weights: vim_t - https://huggingface.co/hustvl/Vim-tiny/blob/main/vim_tiny_73p1.pth
 
+from collections import OrderedDict
+
 import torch
 
 from torch_em.model import UNETR
@@ -101,6 +103,7 @@ def get_vimunet_model(device=None, checkpoint=None):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     encoder = ViM(
+        img_size=1024,
         patch_size=16,
         embed_dim=192,
         depth=24,
@@ -119,7 +122,21 @@ def get_vimunet_model(device=None, checkpoint=None):
 
     if checkpoint is not None:
         state = torch.load(checkpoint, map_location="cpu")
-        encoder_state = state["model"]
+
+        if checkpoint.endswith(".pth"):  # from Vim
+            encoder_state = state["model"]
+
+        else:  # from torch_em
+            model_state = state["model_state"]
+
+            encoder_prefix = "encoder."
+            encoder_state = []
+            for k, v in model_state.items():
+                if k.startswith(encoder_prefix):
+                    encoder_state.append((k[len(encoder_prefix):], v))
+
+            encoder_state = OrderedDict(encoder_state)
+
         encoder.load_state_dict(encoder_state)
 
     encoder.img_size = encoder.patch_embed.img_size[0]