|
| 1 | +import os |
| 2 | +from glob import glob |
| 3 | +from shutil import copyfile |
| 4 | + |
| 5 | +import torch_em |
| 6 | +from . import util |
| 7 | + |
| 8 | + |
| 9 | +CTC_URLS = { |
| 10 | + "BF-C2DL-HSC": "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-HSC.zip", |
| 11 | + "BF-C2DL-MuSC": "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-MuSC.zip", |
| 12 | + "DIC-C2DH-HeLa": "http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip", |
| 13 | + "Fluo-C2DL-Huh7": "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-Huh7.zip", |
| 14 | + "Fluo-C2DL-MSC": "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-MSC.zip", |
| 15 | + "Fluo-N2DH-GOWT1": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-GOWT1.zip", |
| 16 | + "Fluo-N2DH-SIM+": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-SIM+.zip", |
| 17 | + "Fluo-N2DL-HeLa": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip", |
| 18 | + "PhC-C2DH-U373": "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DH-U373.zip", |
| 19 | + "PhC-C2DL-PSC": "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip", |
| 20 | +} |
| 21 | +CTC_CHECKSUMS = { |
| 22 | + "BF-C2DL-HSC": "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1", |
| 23 | + "BF-C2DL-MuSC": "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d", |
| 24 | + "DIC-C2DH-HeLa": "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a", |
| 25 | + "Fluo-C2DL-Huh7": "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d", |
| 26 | + "Fluo-C2DL-MSC": "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2", |
| 27 | + "Fluo-N2DH-GOWT1": "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c", |
| 28 | + "Fluo-N2DH-SIM+": "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8", |
| 29 | + "Fluo-N2DL-HeLa": "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e", |
| 30 | + "PhC-C2DH-U373": "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e", |
| 31 | + "PhC-C2DL-PSC": "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422", |
| 32 | + |
| 33 | +} |
| 34 | + |
| 35 | + |
| 36 | +def _require_ctc_dataset(path, dataset_name, download): |
| 37 | + dataset_names = list(CTC_URLS.keys()) |
| 38 | + if dataset_name not in dataset_names: |
| 39 | + raise ValueError(f"Inalid dataset: {dataset_name}, choose one of {dataset_names}.") |
| 40 | + |
| 41 | + data_path = os.path.join(path, dataset_name) |
| 42 | + |
| 43 | + if not os.path.exists(data_path): |
| 44 | + url, checksum = CTC_URLS[dataset_name], CTC_CHECKSUMS[dataset_name] |
| 45 | + zip_path = os.path.join(path, f"{dataset_name}.zip") |
| 46 | + util.download_source(zip_path, url, download, checksum=checksum) |
| 47 | + util.unzip(zip_path, path, remove=True) |
| 48 | + |
| 49 | + return data_path |
| 50 | + |
| 51 | + |
| 52 | +def _require_gt_images(data_path, splits): |
| 53 | + image_paths, label_paths = [], [] |
| 54 | + |
| 55 | + if isinstance(splits, str): |
| 56 | + splits = [splits] |
| 57 | + |
| 58 | + for split in splits: |
| 59 | + image_folder = os.path.join(data_path, split) |
| 60 | + assert os.path.join(image_folder), f"Cannot find split, {split} in {data_path}." |
| 61 | + |
| 62 | + label_folder = os.path.join(data_path, f"{split}_GT", "SEG") |
| 63 | + |
| 64 | + # copy over the images corresponding to the labeled frames |
| 65 | + label_image_folder = os.path.join(data_path, f"{split}_GT", "IM") |
| 66 | + os.makedirs(label_image_folder, exist_ok=True) |
| 67 | + |
| 68 | + this_label_paths = glob(os.path.join(label_folder, "*.tif")) |
| 69 | + for label_path in this_label_paths: |
| 70 | + fname = os.path.basename(label_path) |
| 71 | + image_label_path = os.path.join(label_image_folder, fname) |
| 72 | + if not os.path.exists(image_label_path): |
| 73 | + im_name = "t" + fname.lstrip("main_seg") |
| 74 | + image_path = os.path.join(image_folder, im_name) |
| 75 | + assert os.path.join(image_path), image_path |
| 76 | + copyfile(image_path, image_label_path) |
| 77 | + |
| 78 | + image_paths.append(label_image_folder) |
| 79 | + label_paths.append(label_folder) |
| 80 | + |
| 81 | + return image_paths, label_paths |
| 82 | + |
| 83 | + |
| 84 | +def get_ctc_segmentation_dataset( |
| 85 | + path, |
| 86 | + dataset_name, |
| 87 | + patch_shape, |
| 88 | + split=None, |
| 89 | + download=False, |
| 90 | + **kwargs, |
| 91 | +): |
| 92 | + """Dataset for the cell tracking challenge segmentation data. |
| 93 | +
|
| 94 | + This dataset provides access to the 2d segmentation datsets of the |
| 95 | + cell tracking challenge. If you use this data in your research please cite |
| 96 | + https://doi.org/10.1038/nmeth.4473 |
| 97 | + """ |
| 98 | + data_path = _require_ctc_dataset(path, dataset_name, download) |
| 99 | + |
| 100 | + if split is None: |
| 101 | + splits = glob(os.path.join(data_path, "*_GT")) |
| 102 | + splits = [os.path.basename(split) for split in splits] |
| 103 | + splits = [split.rstrip("_GT") for split in splits] |
| 104 | + |
| 105 | + image_path, label_path = _require_gt_images(data_path, splits) |
| 106 | + |
| 107 | + kwargs = util.update_kwargs(kwargs, "ndim", 2) |
| 108 | + return torch_em.default_segmentation_dataset( |
| 109 | + image_path, "*.tif", label_path, "*.tif", patch_shape, is_seg_dataset=True, **kwargs |
| 110 | + ) |
| 111 | + |
| 112 | + |
| 113 | +def get_ctc_segmentation_loader( |
| 114 | + path, |
| 115 | + dataset_name, |
| 116 | + patch_shape, |
| 117 | + batch_size, |
| 118 | + split=None, |
| 119 | + download=False, |
| 120 | + **kwargs, |
| 121 | +): |
| 122 | + """Dataloader for cell tracking challenge segmentation data. |
| 123 | + See 'get_ctc_segmentation_dataset' for details. |
| 124 | + """ |
| 125 | + ds_kwargs, loader_kwargs = util.split_kwargs( |
| 126 | + torch_em.default_segmentation_dataset, **kwargs |
| 127 | + ) |
| 128 | + dataset = get_ctc_segmentation_dataset( |
| 129 | + path, dataset_name, patch_shape, split=split, download=download, **ds_kwargs, |
| 130 | + ) |
| 131 | + loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) |
| 132 | + return loader |
0 commit comments