Merge pull request #24 from ChanLumerico/mnv3

MNv3 Preparation
ChanLumerico · Aug 31, 2024 · 7aabbb2 · 7aabbb2
2 parents c625163 + 828785b
commit 7aabbb2
Show file tree

Hide file tree

Showing 23 changed files with 639 additions and 125 deletions.
diff --git a/img/logo/plain/luma.png b/img/logo/plain/luma.png
diff --git a/img/logo/shadow/luma_shadow.png b/img/logo/shadow/luma_shadow.png
diff --git a/img/logo/shadow/luma_shadow_small.png b/img/logo/shadow/luma_shadow_small.png
diff --git a/img/logo/shadow/luma_shadow_tiny.png b/img/logo/shadow/luma_shadow_tiny.png
diff --git a/luma/__import__.py b/luma/__import__.py
@@ -138,7 +138,9 @@
     SeparableConv2D,
     SeparableConv3D,
     DenseBlock,
-    SEBlock,
+    SEBlock1D,
+    SEBlock2D,
+    SEBlock3D,
     IncepBlock,
     IncepResBlock,
     ResNetBlock,
@@ -173,7 +175,8 @@
     XceptionNet,
     MobileNet_V1,
     MobileNet_V2,
-    MobileNet_V3,
+    MobileNet_V3_Small,
+    MobileNet_V3_Large,
 )
 from luma.neural.autoprop import LayerNode, LayerGraph
 
@@ -352,7 +355,7 @@
 
     ConvBlock1D, ConvBlock2D, ConvBlock3D,
     SeparableConv1D, SeparableConv2D, SeparableConv3D,
-    DenseBlock, SEBlock,
+    DenseBlock, SEBlock1D, SEBlock2D, SEBlock3D,
     IncepBlock, IncepResBlock, ResNetBlock, XceptionBlock,
     MobileNetBlock
 
@@ -374,7 +377,8 @@
     InceptionResNet_V1, InceptionResNet_V2, XceptionNet,
     ResNet_18, ResNet_34, ResNet_50, ResNet_101, ResNet_152,
     ResNet_200, ResNet_1001,
-    MobileNet_V1, MobileNet_V2, MobileNet_V3
+    MobileNet_V1, MobileNet_V2, MobileNet_V3_Small,
+    MobileNet_V3_Large,
 
     # ------------------- [ luma.metric ] ----------------------
     Accuracy, Precision, Recall, F1Score, Specificity

diff --git a/luma/interface/typing.py b/luma/interface/typing.py
@@ -177,8 +177,8 @@ def wrapper(self, *args: Any, **kwargs: Any) -> Any:
                             )
                         if tensor.ndim != n_dim:
                             raise ValueError(
-                                f"'{param_name}' must be {n_dim}D-tensor",
-                                +f" got {tensor.ndim}D-tensor.",
+                                f"'{param_name}' must be {n_dim}D-tensor,"
+                                + f" got {tensor.ndim}D-tensor.",
                             )
 
                 return func(self, *args, **kwargs)

diff --git a/luma/neural/README.md b/luma/neural/README.md
@@ -69,6 +69,8 @@ Deep learning models and neural network utilities of Luma
 
 *luma.neural.block 🔗*
 
+### Standard Blocks
+
 | Class | # of Layers | Input Shape | Output Shape |
 | --- | --- | --- | --- |
 | `ConvBlock1D` | 2~3 | $(N,C_{in},W_{in})$ | $(N,C_{out},W_{out})$ |
@@ -78,6 +80,11 @@ Deep learning models and neural network utilities of Luma
 | `SeparableConv2D` | 3~5 | $(N,C_{in},H_{in}, W_{in})$ | $(N,C_{out},H_{out}, W_{out})$ |
 | `SeparableConv3D` | 3~5 | $(N,C_{in},D_{in},H_{in},W_{in})$ | $(N,C_{out},D_{out},H_{out},W_{out})$ |
 | `DenseBlock` | 2~3 | $(N,L_{in})$ | $(N,L_{out})$ |
+
+### Inception Blocks
+
+| Class | # of Layers | Input Shape | Output Shape |
+| --- | --- | --- | --- |
 | `IncepBlock.V1` | 19 | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
 | `IncepBlock.V2_TypeA` | 22 | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
 | `IncepBlock.V2_TypeB` | 31 | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
@@ -89,7 +96,11 @@ Deep learning models and neural network utilities of Luma
 | `IncepBlock.V4_TypeC` | 33 | $(N,1536,8,8)$ | $(N,1536,8,8)$ |
 | `IncepBlock.V4_ReduxA` | 15 | $(N,384,35,35)$ | $(N,1024,17,17)$ |
 | `IncepBlock.V4_ReduxB` | 21 | $(N,1024,17,17)$ | $(N,1536,8,8)$ |
-| `IncepResBlock.V1_Stem` | 17 | $(N,3,299,299)$ | $(N,256,35,35)$ |
+
+### Inception-Res Blocks
+
+| Class | # of Layers | Input Shape | Output Shape |
+| --- | --- | --- | --- |
 | `IncepResBlock.V1_TypeA` | 22 | $(N,256,35,35)$ | $(N,256,35,35)$ |
 | `IncepResBlock.V1_TypeB` | 16 | $(N,896,17,17)$ | $(N,896,17,17)$ |
 | `IncepResBlock.V1_TypeC` | 16 | $(N,1792,8,8)$ | $(N,1792,8,8)$ |
@@ -98,13 +109,29 @@ Deep learning models and neural network utilities of Luma
 | `IncepResBlock.V2_TypeB` | 16 | $(N,1280,17,17)$ | $(N,1280,17,17)$ |
 | `IncepResBlock.V2_TypeC` | 16 | $(N,2272,8,8)$ | $(N,2272,8,8)$ |
 | `IncepResBlock.V2_Redux` | 24 | $(N,1280,17,17)$ | $(N,2272,8,8)$ |
+
+### ResNet Blocks
+
+| Class | # of Layers | Input Shape | Output Shape |
+| --- | --- | --- | --- |
 | `ResNetBlock.Basic` | 7~ | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
 | `ResNetBlock.Bottleneck` | 10~ | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
 | `ResNetBlock.PreActBottleneck` | 10~ | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
+
+### Xception Blocks
+
+| Class | # of Layers | Input Shape | Output Shape |
+| --- | --- | --- | --- |
 | `XceptionBlock.Entry` | 42 | $(N,3,299,299)$ | $(N,728,19,19)$ |
 | `XceptionBlock.Middle` | 14 | $(N,728,19,19)$ | $(N,728,19,19)$ |
 | `XceptionBlock.Exit` | 11 | $(N,728,19,19)$ | $(N,1024,9,9)$ |
+
+### MobileNet Blocks
+
+| Class | # of Layers | Input Shape | Output Shape |
+| --- | --- | --- | --- |
 | `MobileNetBlock.InvertedRes` | 6~9 | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
+| `MobileNetBlock.InvertedRes_SE` | 14~17 | $(N,C_{in},H_{in},W_{in})$ | $(N,C_{out},H_{out},W_{out})$ |
 
 ---
 

diff --git a/luma/neural/autoprop/__init__.py b/luma/neural/autoprop/__init__.py
@@ -6,3 +6,4 @@
 """
 
 from .graph import LayerNode, LayerGraph
+from .merge import MergeMode
diff --git a/luma/neural/block/__init__.py b/luma/neural/block/__init__.py
@@ -24,6 +24,7 @@
     incep_res_v2,
     mobile,
     resnet,
+    se,
     standard,
     xception,
 )
@@ -37,7 +38,9 @@
     "SeparableConv2D",
     "SeparableConv3D",
     "DenseBlock",
-    "SEBlock",
+    "SEBlock1D",
+    "SEBlock2D",
+    "SEBlock3D",
     "IncepBlock",
     "IncepResBlock",
     "ResNetBlock",
@@ -84,7 +87,7 @@ class ConvBlock1D(standard._ConvBlock1D):
         Number of output channels
     `filter_size`: tuple of int or int
         Size of each filter
-    `activation` : FuncType
+    `activation` : callable
         Type of activation function
     `padding` : tuple of int or int or {"same", "valid"}, default="same"
         Padding method
@@ -139,7 +142,7 @@ class ConvBlock2D(standard._ConvBlock2D):
         Number of output channels
     `filter_size`: tuple of int or int
         Size of each filter
-    `activation` : FuncType
+    `activation` : callable
         Type of activation function
     `padding` : tuple of int or int or {"same", "valid"}, default="same"
         Padding method
@@ -194,7 +197,7 @@ class ConvBlock3D(standard._ConvBlock3D):
         Number of output channels
     `filter_size`: tuple of int or int
         Size of each filter
-    `activation` : FuncType
+    `activation` : callable
         Type of activation function
     `padding` : tuple of int or int or {"same", "valid"}, default="same"
         Padding method
@@ -389,7 +392,7 @@ class DenseBlock(standard._DenseBlock):
         Number of input features
     `out_features` : int
         Number of output features
-    `activation` : FuncType
+    `activation` : callable
         Type of activation function
     `optimizer` : Optimizer, optional, default=None
         Type of optimizer for weight update
@@ -409,9 +412,75 @@ class DenseBlock(standard._DenseBlock):
     """
 
 
-class SEBlock(standard._SEBlock):
+class SEBlock1D(se._SEBlock1D):
     """
-    The SEBlock (Squeeze-and-Excitation Block) enhances the representational
+    Squeeze-and-Excitation(SE) block for 1-dimensional data.
+
+    The SE-Block enhances the representational
+    power of a network by recalibrating channel-wise feature responses. It
+    first squeezes the spatial dimensions using global average pooling, then
+    excites the channels with learned weights through fully connected layers
+    and an activation function. This selectively emphasizes important channels
+    while suppressing less relevant ones.
+
+    Parameters
+    ----------
+    `in_channels` : int
+        Number of input channels
+    `reduction`: int, default=4
+        Reducing factor of the 'Squeeze' phase.
+    `activation` : callable, default=Activation.HardSwish
+        Type of activation function
+    `optimizer` : Optimizer, optional, default=None
+        Type of optimizer for weight update
+    `initializer` : InitStr, default=None
+        Type of weight initializer
+    `lambda_` : float, default=0.0
+        L2 regularization strength
+    `keep_shape` : bool, default=True
+        Whether to maintain the original shape of the input;
+        Transforms 3D-Tensor to 2D-Matrix if set to False.
+
+    """
+
+
+class SEBlock2D(se._SEBlock2D):
+    """
+    Squeeze-and-Excitation(SE) block for 2-dimensional data.
+
+    The SE-Block enhances the representational
+    power of a network by recalibrating channel-wise feature responses. It
+    first squeezes the spatial dimensions using global average pooling, then
+    excites the channels with learned weights through fully connected layers
+    and an activation function. This selectively emphasizes important channels
+    while suppressing less relevant ones.
+
+    Parameters
+    ----------
+    `in_channels` : int
+        Number of input channels
+    `reduction`: int, default=4
+        Reducing factor of the 'Squeeze' phase.
+    `activation` : callable, default=Activation.HardSwish
+        Type of activation function
+    `optimizer` : Optimizer, optional, default=None
+        Type of optimizer for weight update
+    `initializer` : InitStr, default=None
+        Type of weight initializer
+    `lambda_` : float, default=0.0
+        L2 regularization strength
+    `keep_shape` : bool, default=True
+        Whether to maintain the original shape of the input;
+        Transforms 4D-Tensor to 2D-Matrix if set to False.
+
+    """
+
+
+class SEBlock3D(se._SEBlock3D):
+    """
+    Squeeze-and-Excitation(SE) block for 3-dimensional data.
+
+    The SE-Block enhances the representational
     power of a network by recalibrating channel-wise feature responses. It
     first squeezes the spatial dimensions using global average pooling, then
     excites the channels with learned weights through fully connected layers
@@ -422,14 +491,19 @@ class SEBlock(standard._SEBlock):
     ----------
     `in_channels` : int
         Number of input channels
-    `activation` : FuncType
+    `reduction`: int, default=4
+        Reducing factor of the 'Squeeze' phase.
+    `activation` : callable, default=Activation.HardSwish
         Type of activation function
     `optimizer` : Optimizer, optional, default=None
         Type of optimizer for weight update
     `initializer` : InitStr, default=None
         Type of weight initializer
     `lambda_` : float, default=0.0
         L2 regularization strength
+    `keep_shape` : bool, default=True
+        Whether to maintain the original shape of the input;
+        Transforms 5D-Tensor to 2D-Matrix if set to False.
 
     """
 
@@ -906,6 +980,11 @@ class MobileNetBlock:
         Convolutional Neural Networks for Mobile Vision Applications.”
         arXiv, 17 Apr. 2017, arxiv.org/abs/1704.04861.
 
+    `MobileNet V3` :
+        [2] Howard, Andrew, et al. "Searching for MobileNetV3." Proceedings
+        of the IEEE/CVF International Conference on Computer Vision, 2019,
+        doi:10.1109/ICCV.2019.00140.
+
     """
 
     class InvertedRes(mobile._InvertedRes):
@@ -915,3 +994,11 @@ class InvertedRes(mobile._InvertedRes):
 
         Refer to the figures shown in the original paper[1].
         """
+
+    class InvertedRes_SE(mobile._InvertedRes_SE):
+        """
+        Inverted Residual Block with depth-wise and point-wise
+        convolutions and SE-Block attached used in MobileNet V3.
+
+        Refer to the figures shown in the original paper[2].
+        """
diff --git a/luma/neural/block/incep_res_v1.py b/luma/neural/block/incep_res_v1.py
@@ -5,7 +5,7 @@
 from luma.interface.util import InitUtil
 
 from luma.neural.layer import *
-from luma.neural.autoprop import LayerNode, LayerGraph
+from luma.neural.autoprop import LayerNode, LayerGraph, MergeMode
 
 
 class _IncepRes_V1_Stem(Sequential):
@@ -120,7 +120,7 @@ def init_nodes(self) -> None:
         self.rt_ = LayerNode(Identity(), name="rt_")
         self.res_sum = LayerNode(
             Sequential(Identity(), self.activation()),
-            merge_mode="sum",
+            MergeMode.SUM,
             name="res_sum",
         )
 
@@ -163,7 +163,7 @@ def init_nodes(self) -> None:
                 Conv2D(96, 256, 1, 1, "same", **self.basic_args),
                 BatchNorm2D(256, self.momentum),
             ),
-            merge_mode="chcat",
+            MergeMode.CHCAT,
             name="br_cat",
         )
 
@@ -225,7 +225,7 @@ def init_nodes(self) -> None:
         self.rt_ = LayerNode(Identity(), name="rt_")
         self.res_sum = LayerNode(
             Sequential(Identity(), self.activation()),
-            merge_mode="sum",
+            MergeMode.SUM,
             name="res_sum",
         )
 
@@ -257,7 +257,7 @@ def init_nodes(self) -> None:
                 Conv2D(128, 896, 1, 1, "same", **self.basic_args),
                 BatchNorm2D(896, self.momentum),
             ),
-            merge_mode="chcat",
+            MergeMode.CHCAT,
             name="br_cat",
         )
 
@@ -319,7 +319,7 @@ def init_nodes(self) -> None:
         self.rt_ = LayerNode(Identity(), name="rt_")
         self.res_sum = LayerNode(
             Sequential(Identity(), self.activation()),
-            merge_mode="sum",
+            MergeMode.SUM,
             name="res_sum",
         )
 
@@ -351,7 +351,7 @@ def init_nodes(self) -> None:
                 Conv2D(384, 1792, 1, 1, "same", **self.basic_args),
                 BatchNorm2D(192, self.momentum),
             ),
-            merge_mode="chcat",
+            MergeMode.CHCAT,
             name="br_cat",
         )
 
@@ -451,7 +451,7 @@ def init_nodes(self) -> None:
             name="br_d",
         )
 
-        self.cat_ = LayerNode(Identity(), merge_mode="chcat", name="cat_")
+        self.cat_ = LayerNode(Identity(), MergeMode.CHCAT, name="cat_")
 
     @Tensor.force_shape((-1, 896, 17, 17))
     def forward(self, X: TensorLike, is_train: bool = False) -> TensorLike: