Merge pull request #365 from kwcckw/dev

Added support of mask, keypoints and bounding boxes in augmentations.
sparkfish · Sep 12, 2023 · ee85100 · ee85100
2 parents 24219e4 + b71cef8
commit ee85100
Show file tree

Hide file tree

Showing 16 changed files with 245 additions and 25 deletions.
diff --git a/augraphy/augmentations/hollow.py b/augraphy/augmentations/hollow.py
@@ -264,4 +264,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if is_gray:
                 image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2GRAY)
 
-            return image_output
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image_output] + outputs_extra
+            else:
+                return image_output
diff --git a/augraphy/augmentations/inkbleed.py b/augraphy/augmentations/inkbleed.py
@@ -89,4 +89,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if has_alpha:
                 image_output = np.dstack((image_output, image_alpha))
 
-            return image_output
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image_output] + outputs_extra
+            else:
+                return image_output
diff --git a/augraphy/augmentations/inkcolorswap.py b/augraphy/augmentations/inkcolorswap.py
@@ -213,4 +213,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if is_gray:
                 image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
-            return image
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image] + outputs_extra
+            else:
+                return image
diff --git a/augraphy/augmentations/inkmottling.py b/augraphy/augmentations/inkmottling.py
@@ -103,4 +103,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if is_gray:
                 image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
-            return image
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image] + outputs_extra
+            else:
+                return image
diff --git a/augraphy/augmentations/inkshifter.py b/augraphy/augmentations/inkshifter.py
@@ -207,4 +207,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
                     text_shift_factor * noisemap_y,
                 )
 
-            return disp_img
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [disp_img] + outputs_extra
+            else:
+                return disp_img
diff --git a/augraphy/augmentations/jpeg.py b/augraphy/augmentations/jpeg.py
@@ -49,4 +49,16 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if has_alpha:
                 image = np.dstack((image, image_alpha))
 
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image] + outputs_extra
+            else:
+                return image
+
             return image
diff --git a/augraphy/augmentations/letterpress.py b/augraphy/augmentations/letterpress.py
@@ -125,4 +125,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             indices = image < value_threshold
             image[indices] = noise_mask[indices]
 
-            return image
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image] + outputs_extra
+            else:
+                return image
diff --git a/augraphy/augmentations/lightinggradient.py b/augraphy/augmentations/lightinggradient.py
@@ -220,7 +220,7 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
                 bgr = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
                 hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
 
-            mask = self.generate_parallel_light_mask(
+            lighting_mask = self.generate_parallel_light_mask(
                 mask_size=(width, height),
                 position=self.light_position,
                 direction=self.direction,
@@ -229,12 +229,22 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
                 mode=self.mode,
                 linear_decay_rate=self.linear_decay_rate,
             )
-            hsv[:, :, 2] = hsv[:, :, 2] * transparency + mask * (1 - transparency)
+            hsv[:, :, 2] = hsv[:, :, 2] * transparency + lighting_mask * (1 - transparency)
             frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
             frame[frame > 255] = 255
             frame = np.asarray(frame, dtype=np.uint8)
 
             if has_alpha:
                 frame = np.dstack((frame, image_alpha))
 
-            return frame
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [frame] + outputs_extra
+            else:
+                return frame
diff --git a/augraphy/augmentations/linesdegradation.py b/augraphy/augmentations/linesdegradation.py
@@ -177,12 +177,12 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
 
             # merge mask and set max value = 1
             if gradient_direction == 2:
-                mask = mask_x + mask_y
+                mask_xy = mask_x + mask_y
             elif gradient_direction == 1:
-                mask = mask_y
+                mask_xy = mask_y
             else:
-                mask = mask_x
-            mask[mask > 0] = 1
+                mask_xy = mask_x
+            mask_xy[mask_xy > 0] = 1
 
             # output image
             image_output = image.copy()
@@ -198,8 +198,18 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if len(image_output.shape) > 2:
                 # skip alpha layer
                 for i in range(3):
-                    image_output[ystart:yend, xstart:xend, i][mask > 0] = replacement_mask[mask > 0]
+                    image_output[ystart:yend, xstart:xend, i][mask_xy > 0] = replacement_mask[mask_xy > 0]
             else:
-                image_output[ystart:yend, xstart:xend][mask > 0] = replacement_mask[mask > 0]
+                image_output[ystart:yend, xstart:xend][mask_xy > 0] = replacement_mask[mask_xy > 0]
 
-            return image_output
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image_output] + outputs_extra
+            else:
+                return image_output
diff --git a/augraphy/augmentations/lowinkperiodiclines.py b/augraphy/augmentations/lowinkperiodiclines.py
@@ -100,4 +100,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             for i in range(count):
                 self.add_periodic_transparency_lines(image, count, period)
 
-            return image
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image] + outputs_extra
+            else:
+                return image
diff --git a/augraphy/augmentations/lowinkrandomlines.py b/augraphy/augmentations/lowinkrandomlines.py
@@ -50,4 +50,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
                         random.randint(1, image.shape[0] - 1),
                     )
 
-            return image
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image] + outputs_extra
+            else:
+                return image
diff --git a/augraphy/augmentations/lowlightnoise.py b/augraphy/augmentations/lowlightnoise.py
@@ -184,4 +184,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
         if has_alpha:
             result = np.dstack((result, image_alpha))
 
-        return result
+        # check for additional output of mask, keypoints and bounding boxes
+        outputs_extra = []
+        if mask is not None or keypoints is not None or bounding_boxes is not None:
+            outputs_extra = [mask, keypoints, bounding_boxes]
+
+        # returns additional mask, keypoints and bounding boxes if there is additional input
+        if outputs_extra:
+            # returns in the format of [image, mask, keypoints, bounding_boxes]
+            return [result] + outputs_extra
+        else:
+            return result
diff --git a/augraphy/augmentations/markup.py b/augraphy/augmentations/markup.py
@@ -408,4 +408,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
         if has_alpha:
             markup_image = np.dstack((markup_image, image_alpha))
 
-        return markup_image
+        # check for additional output of mask, keypoints and bounding boxes
+        outputs_extra = []
+        if mask is not None or keypoints is not None or bounding_boxes is not None:
+            outputs_extra = [mask, keypoints, bounding_boxes]
+
+        # returns additional mask, keypoints and bounding boxes if there is additional input
+        if outputs_extra:
+            # returns in the format of [image, mask, keypoints, bounding_boxes]
+            return [markup_image] + outputs_extra
+        else:
+            return markup_image
diff --git a/augraphy/augmentations/noisetexturize.py b/augraphy/augmentations/noisetexturize.py
@@ -116,4 +116,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if has_alpha:
                 cut = np.dstack((cut, image_alpha))
 
-            return cut
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [cut] + outputs_extra
+            else:
+                return cut
diff --git a/augraphy/augmentations/noisylines.py b/augraphy/augmentations/noisylines.py
@@ -194,4 +194,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
             if has_alpha:
                 image_output = np.dstack((image_output, image_alpha))
 
-            return image_output
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image_output] + outputs_extra
+            else:
+                return image_output
diff --git a/augraphy/augmentations/pageborder.py b/augraphy/augmentations/pageborder.py
@@ -234,6 +234,9 @@ def create_page_borders(
         image,
         page_border_width,
         page_border_height,
+        mask,
+        keypoints,
+        bounding_boxes,
     ):
         """Create page borders effect and apply it into input image.
 
@@ -243,6 +246,13 @@ def create_page_borders(
         :type border_width: int
         :param border_height: Vertical direction and height of borders.
         :type border_height: int
+        :param mask: The mask of labels for each pixel. Mask value should be in range of 1 to 255.
+            Value of 0 will be assigned to the filled area after the transformation.
+        :type mask: numpy array (uint8)
+        :param keypoints: A dictionary of single or multiple labels where each label is a nested list of points coordinate.
+        :type keypoints: dictionary
+        :param bounding_boxes: A nested list where each nested list contains box location (x1, y1, x2, y2).
+        :type bounding_boxes: list
         """
 
         border_width = abs(page_border_width)
@@ -551,6 +561,51 @@ def create_page_borders(
                             end_x = bxsize
                         border_image_merged = border_image_merged[:, start_x:end_x]
 
+        # for not same page border
+        else:
+            if mask is not None:
+                pad_x = [0, 0]
+                pad_y = [0, 0]
+                if page_border_width > 0:
+                    pad_x = [0, page_border_width]
+                elif page_border_width < 0:
+                    pad_x = [abs(page_border_width), 0]
+                if page_border_height > 0:
+                    pad_y = [0, page_border_height]
+                elif page_border_height < 0:
+                    pad_y = [abs(page_border_height), 0]
+                # padd mask based on the added page border value
+                mask = np.pad(
+                    mask,
+                    pad_width=(pad_y, pad_x),
+                    mode="constant",
+                    constant_values=0,
+                )
+
+            if keypoints is not None:
+                offset_x = 0
+                offset_y = 0
+                if page_border_width < 0:
+                    offset_x = page_border_width
+                if page_border_height < 0:
+                    offset_x = page_border_width
+                # check each keypoint and add the padded length
+                for name, points in keypoints.items():
+                    for i, (xpoint, ypoint) in enumerate(points):
+                        points[i] = [xpoint + offset_x, ypoint + offset_y]
+
+            if bounding_boxes is not None:
+                offset_x = 0
+                offset_y = 0
+                if page_border_width < 0:
+                    offset_x = page_border_width
+                if page_border_height < 0:
+                    offset_x = page_border_width
+                # check each point and add the padded length
+                for i, bounding_box in enumerate(bounding_boxes):
+                    xspoint, yspoint, xepoint, yepoint = bounding_box
+                    bounding_boxes[i] = [xspoint + offset_x, yspoint + offset_y, xepoint + offset_x, yepoint + offset_y]
+
         # rotate back to original position
         # default, extend top left
         if page_border_width < 0 and page_border_height < 0:
@@ -582,7 +637,7 @@ def create_page_borders(
             # rotate counter clockwise 2 times  from left (left is reference) back to right
             border_image_merged = np.rot90(border_image_merged, 2)
 
-        return border_image_merged
+        return border_image_merged, mask
 
     def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=None, force=False):
         if force or self.should_run():
@@ -624,10 +679,13 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
                 else:
                     border_height = self.page_border_width_height[1]
 
-            image_output = self.create_page_borders(
+            image_output, mask = self.create_page_borders(
                 image.copy(),
                 border_width,
                 border_height,
+                mask,
+                keypoints,
+                bounding_boxes,
             )
 
             # return image follows the input image color channel
@@ -644,4 +702,14 @@ def __call__(self, image, layer=None, mask=None, keypoints=None, bounding_boxes=
                     )
                 image_output = np.dstack((image_output, image_alpha))
 
-            return image_output
+            # check for additional output of mask, keypoints and bounding boxes
+            outputs_extra = []
+            if mask is not None or keypoints is not None or bounding_boxes is not None:
+                outputs_extra = [mask, keypoints, bounding_boxes]
+
+            # returns additional mask, keypoints and bounding boxes if there is additional input
+            if outputs_extra:
+                # returns in the format of [image, mask, keypoints, bounding_boxes]
+                return [image_output] + outputs_extra
+            else:
+                return image_output