slegroux
diff --git a/‎nbs/models.conv.ipynb
+114-76 b/‎nbs/models.conv.ipynb
+114-76
diff --git a/‎nbs/models.core.ipynb
+1-1 b/‎nbs/models.core.ipynb
+1-1
@@ -32,7 +32,16 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[23:23:58] INFO - PyTorch version 2.5.1 available.\n",
+      "Seed set to 42\n"
+     ]
+    }
+   ],
    "source": [
     "#| export\n",
     "import torch.nn as nn\n",
@@ -96,11 +105,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[19:18:28] INFO - Init ImageDataModule for mnist\n",
-      "[19:18:32] INFO - loading dataset mnist with args () from split train\n",
-      "[19:18:40] INFO - loading dataset mnist with args () from split test\n",
-      "[19:18:42] INFO - split train into train/val [0.8, 0.2]\n",
-      "[19:18:43] INFO - train: 48000 val: 12000, test: 10000\n"
+      "[23:24:00] INFO - Init ImageDataModule for mnist\n",
+      "[23:24:05] INFO - loading dataset mnist with args () from split train\n",
+      "[23:24:12] INFO - loading dataset mnist with args () from split test\n",
+      "[23:24:15] INFO - split train into train/val [0.8, 0.2]\n",
+      "[23:24:15] INFO - train: 48000 val: 12000, test: 10000\n"
      ]
     }
    ],
@@ -325,7 +334,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L38){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer\n",
        "\n",
@@ -380,7 +389,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L38){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer\n",
        "\n",
@@ -452,7 +461,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L75){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L77){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer.forward\n",
        "\n",
@@ -468,7 +477,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L75){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L77){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer.forward\n",
        "\n",
@@ -507,7 +516,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[19:18:43] WARNING - setting conv bias back to False as Batchnorm is used\n"
+      "[23:24:16] WARNING - setting conv bias back to False as Batchnorm is used\n"
      ]
     },
     {
@@ -614,7 +623,7 @@
      "output_type": "stream",
      "text": [
       "Seed set to 42\n",
-      "[19:18:43] WARNING - setting conv bias back to False as Batchnorm is used\n"
+      "[23:24:16] WARNING - setting conv bias back to False as Batchnorm is used\n"
      ]
     },
     {
@@ -706,7 +715,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L38){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer\n",
        "\n",
@@ -761,7 +770,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L38){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer\n",
        "\n",
@@ -1048,60 +1057,112 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L38){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer\n",
        "\n",
        ">      ConvLayer (in_channels:int=3, out_channels:int=16, kernel_size:int=3,\n",
-       ">                 stride:int=2, bias:bool=True, normalization:Optional[Type[torc\n",
-       ">                 h.nn.modules.module.Module]]=<class\n",
+       ">                 stride:int=2, bias:bool=False, normalization:Optional[Type[tor\n",
+       ">                 ch.nn.modules.module.Module]]=<class\n",
        ">                 'torch.nn.modules.batchnorm.BatchNorm2d'>, activation:Optional\n",
        ">                 [Type[torch.nn.modules.module.Module]]=<class\n",
        ">                 'torch.nn.modules.activation.ReLU'>)\n",
        "\n",
-       "*A 2D convolutional layer with optional batch normalization and activation.\n",
+       "*Base class for all neural network modules.\n",
+       "\n",
+       "Your models should also subclass this class.\n",
+       "\n",
+       "Modules can also contain other Modules, allowing to nest them in\n",
+       "a tree structure. You can assign the submodules as regular attributes::\n",
        "\n",
-       "This layer performs 2D convolution with stride 2 for downsampling, optionally followed by\n",
-       "batch normalization and activation.*\n",
+       "    import torch.nn as nn\n",
+       "    import torch.nn.functional as F\n",
+       "\n",
+       "    class Model(nn.Module):\n",
+       "        def __init__(self) -> None:\n",
+       "            super().__init__()\n",
+       "            self.conv1 = nn.Conv2d(1, 20, 5)\n",
+       "            self.conv2 = nn.Conv2d(20, 20, 5)\n",
+       "\n",
+       "        def forward(self, x):\n",
+       "            x = F.relu(self.conv1(x))\n",
+       "            return F.relu(self.conv2(x))\n",
+       "\n",
+       "Submodules assigned in this way will be registered, and will have their\n",
+       "parameters converted too when you call :meth:`to`, etc.\n",
+       "\n",
+       ".. note::\n",
+       "    As per the example above, an ``__init__()`` call to the parent class\n",
+       "    must be made before assignment on the child.\n",
+       "\n",
+       ":ivar training: Boolean represents whether this module is in training or\n",
+       "                evaluation mode.\n",
+       ":vartype training: bool*\n",
        "\n",
        "|    | **Type** | **Default** | **Details** |\n",
        "| -- | -------- | ----------- | ----------- |\n",
        "| in_channels | int | 3 | input channels |\n",
        "| out_channels | int | 16 | output channels |\n",
        "| kernel_size | int | 3 | kernel size |\n",
        "| stride | int | 2 | stride |\n",
-       "| bias | bool | True | If True, adds a learnable bias to the convolution |\n",
-       "| normalization | Optional | BatchNorm2d | Normalization layer to use after convolution |\n",
-       "| activation | Optional | ReLU | Activation function to use after normalization |"
+       "| bias | bool | False |  |\n",
+       "| normalization | Optional | BatchNorm2d |  |\n",
+       "| activation | Optional | ReLU |  |"
       ],
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/slegroux/nimrod/blob/main/nimrod/models/conv.py#L38){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "### ConvLayer\n",
        "\n",
        ">      ConvLayer (in_channels:int=3, out_channels:int=16, kernel_size:int=3,\n",
-       ">                 stride:int=2, bias:bool=True, normalization:Optional[Type[torc\n",
-       ">                 h.nn.modules.module.Module]]=<class\n",
+       ">                 stride:int=2, bias:bool=False, normalization:Optional[Type[tor\n",
+       ">                 ch.nn.modules.module.Module]]=<class\n",
        ">                 'torch.nn.modules.batchnorm.BatchNorm2d'>, activation:Optional\n",
        ">                 [Type[torch.nn.modules.module.Module]]=<class\n",
        ">                 'torch.nn.modules.activation.ReLU'>)\n",
        "\n",
-       "*A 2D convolutional layer with optional batch normalization and activation.\n",
+       "*Base class for all neural network modules.\n",
+       "\n",
+       "Your models should also subclass this class.\n",
+       "\n",
+       "Modules can also contain other Modules, allowing to nest them in\n",
+       "a tree structure. You can assign the submodules as regular attributes::\n",
        "\n",
-       "This layer performs 2D convolution with stride 2 for downsampling, optionally followed by\n",
-       "batch normalization and activation.*\n",
+       "    import torch.nn as nn\n",
+       "    import torch.nn.functional as F\n",
+       "\n",
+       "    class Model(nn.Module):\n",
+       "        def __init__(self) -> None:\n",
+       "            super().__init__()\n",
+       "            self.conv1 = nn.Conv2d(1, 20, 5)\n",
+       "            self.conv2 = nn.Conv2d(20, 20, 5)\n",
+       "\n",
+       "        def forward(self, x):\n",
+       "            x = F.relu(self.conv1(x))\n",
+       "            return F.relu(self.conv2(x))\n",
+       "\n",
+       "Submodules assigned in this way will be registered, and will have their\n",
+       "parameters converted too when you call :meth:`to`, etc.\n",
+       "\n",
+       ".. note::\n",
+       "    As per the example above, an ``__init__()`` call to the parent class\n",
+       "    must be made before assignment on the child.\n",
+       "\n",
+       ":ivar training: Boolean represents whether this module is in training or\n",
+       "                evaluation mode.\n",
+       ":vartype training: bool*\n",
        "\n",
        "|    | **Type** | **Default** | **Details** |\n",
        "| -- | -------- | ----------- | ----------- |\n",
        "| in_channels | int | 3 | input channels |\n",
        "| out_channels | int | 16 | output channels |\n",
        "| kernel_size | int | 3 | kernel size |\n",
        "| stride | int | 2 | stride |\n",
-       "| bias | bool | True | If True, adds a learnable bias to the convolution |\n",
-       "| normalization | Optional | BatchNorm2d | Normalization layer to use after convolution |\n",
-       "| activation | Optional | ReLU | Activation function to use after normalization |"
+       "| bias | bool | False |  |\n",
+       "| normalization | Optional | BatchNorm2d |  |\n",
+       "| activation | Optional | ReLU |  |"
       ]
      },
      "execution_count": null,
@@ -1135,47 +1196,24 @@
       "==========================================================================================\n",
       "ConvNet                                  [64, 10]                  --\n",
       "├─Sequential: 1-1                        [64, 10]                  --\n",
-      "│    └─ConvLayer: 2-1                    [64, 8, 28, 28]           --\n",
-      "│    │    └─Sequential: 3-1              [64, 8, 28, 28]           --\n",
-      "│    │    │    └─Conv2d: 4-1             [64, 8, 28, 28]           72\n",
-      "│    │    │    └─BatchNorm2d: 4-2        [64, 8, 28, 28]           16\n",
-      "│    │    │    └─ReLU: 4-3               [64, 8, 28, 28]           --\n",
-      "│    └─ConvLayer: 2-2                    [64, 16, 14, 14]          --\n",
-      "│    │    └─Sequential: 3-2              [64, 16, 14, 14]          --\n",
-      "│    │    │    └─Conv2d: 4-4             [64, 16, 14, 14]          1,152\n",
-      "│    │    │    └─BatchNorm2d: 4-5        [64, 16, 14, 14]          32\n",
-      "│    │    │    └─ReLU: 4-6               [64, 16, 14, 14]          --\n",
-      "│    └─ConvLayer: 2-3                    [64, 32, 7, 7]            --\n",
-      "│    │    └─Sequential: 3-3              [64, 32, 7, 7]            --\n",
-      "│    │    │    └─Conv2d: 4-7             [64, 32, 7, 7]            4,608\n",
-      "│    │    │    └─BatchNorm2d: 4-8        [64, 32, 7, 7]            64\n",
-      "│    │    │    └─ReLU: 4-9               [64, 32, 7, 7]            --\n",
-      "│    └─ConvLayer: 2-4                    [64, 64, 4, 4]            --\n",
-      "│    │    └─Sequential: 3-4              [64, 64, 4, 4]            --\n",
-      "│    │    │    └─Conv2d: 4-10            [64, 64, 4, 4]            18,432\n",
-      "│    │    │    └─BatchNorm2d: 4-11       [64, 64, 4, 4]            128\n",
-      "│    │    │    └─ReLU: 4-12              [64, 64, 4, 4]            --\n",
-      "│    └─ConvLayer: 2-5                    [64, 128, 2, 2]           --\n",
-      "│    │    └─Sequential: 3-5              [64, 128, 2, 2]           --\n",
-      "│    │    │    └─Conv2d: 4-13            [64, 128, 2, 2]           73,728\n",
-      "│    │    │    └─BatchNorm2d: 4-14       [64, 128, 2, 2]           256\n",
-      "│    │    │    └─ReLU: 4-15              [64, 128, 2, 2]           --\n",
-      "│    └─ConvLayer: 2-6                    [64, 10, 1, 1]            --\n",
-      "│    │    └─Sequential: 3-6              [64, 10, 1, 1]            --\n",
-      "│    │    │    └─Conv2d: 4-16            [64, 10, 1, 1]            11,520\n",
-      "│    │    │    └─BatchNorm2d: 4-17       [64, 10, 1, 1]            20\n",
-      "│    │    │    └─ReLU: 4-18              [64, 10, 1, 1]            --\n",
-      "│    └─Flatten: 2-7                      [64, 10]                  --\n",
+      "│    └─ConvLayer: 2-1                    [64, 8, 28, 28]           88\n",
+      "│    └─ConvLayer: 2-2                    [64, 16, 14, 14]          1,184\n",
+      "│    └─ConvLayer: 2-3                    [64, 32, 7, 7]            4,672\n",
+      "│    └─ConvLayer: 2-4                    [64, 64, 4, 4]            18,560\n",
+      "│    └─ConvLayer: 2-5                    [64, 128, 2, 2]           73,984\n",
+      "│    └─ConvLayer: 2-6                    [64, 64, 1, 1]            73,856\n",
+      "│    └─ConvLayer: 2-7                    [64, 10, 1, 1]            5,780\n",
+      "│    └─Flatten: 2-8                      [64, 10]                  --\n",
       "==========================================================================================\n",
-      "Total params: 110,028\n",
-      "Trainable params: 110,028\n",
+      "Total params: 178,124\n",
+      "Trainable params: 178,124\n",
       "Non-trainable params: 0\n",
-      "Total mult-adds (Units.MEGABYTES): 71.03\n",
+      "Total mult-adds (Units.MEGABYTES): 75.39\n",
       "==========================================================================================\n",
       "Input size (MB): 0.20\n",
-      "Forward/backward pass size (MB): 12.82\n",
-      "Params size (MB): 0.44\n",
-      "Estimated Total Size (MB): 13.46\n",
+      "Forward/backward pass size (MB): 12.89\n",
+      "Params size (MB): 0.71\n",
+      "Estimated Total Size (MB): 13.80\n",
       "==========================================================================================\n",
       "{'_target_': 'nimrod.models.conv.ConvNet', 'n_features': [1, 8, 16, 32, 64, 128], 'num_classes': 10, 'kernel_size': 3, 'bias': False, 'normalization': {'_target_': 'hydra.utils.get_class', 'path': 'torch.nn.BatchNorm2d'}, 'activation': {'_target_': 'hydra.utils.get_class', 'path': 'torch.nn.ReLU'}}\n"
      ]
@@ -1189,16 +1227,16 @@
     "\n",
     "# model instantiation\n",
     "convnet = ConvNet(\n",
-    "            n_features=[1, 8, 16, 32, 64, 128], # channel/feature expansion\n",
-    "            num_classes=10, # num_classes\n",
-    "            kernel_size=3, # kernel size\n",
-    "            bias=False, # conv2d bias\n",
-    "            normalization=nn.BatchNorm2d, # normalization (before activation)\n",
-    "            activation=nn.ReLU,\n",
+    "    n_features=[1, 8, 16, 32, 64, 128, 64], # channel/feature expansion\n",
+    "    num_classes=10, # num_classes\n",
+    "    kernel_size=3, # kernel size\n",
+    "    bias=False, # conv2d bias\n",
+    "    normalization=nn.BatchNorm2d, # normalization (before activation)\n",
+    "    activation=nn.ReLU,\n",
     ")\n",
     "out = convnet(X)\n",
     "print(out.shape)\n",
-    "print(summary(convnet, input_size=(X.shape), depth=5))\n",
+    "print(summary(convnet, input_size=(X.shape), depth=2))\n",
     "# from config\n",
     "cfg = OmegaConf.load('../config/model/image/convnet.yaml')\n",
     "# print(cfg.defaults)\n",
 
@@ -474,7 +474,7 @@
     "\n",
     "    if model_summary:\n",
     "        xb, yb = next(iter(datamodule.train_dataloader()))\n",
-    "        print(summary(model.nnet, input_size=xb.shape, depth=-1, device='cpu'))\n",
+    "        print(summary(model.nnet, input_size=xb.shape, depth=5, device='cpu'))\n",
     "    \n",
     "    trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())\n",
     "    if test:\n",