fix: LSDV-4825: Apply black formatter to label-studio-converter

HumanSignal · Mar 21, 2023 · 83ac08c · 83ac08c
1 parent 317ecb2
commit 83ac08c
Show file tree

Hide file tree

Showing 19 changed files with 1,113 additions and 559 deletions.
diff --git a/label_studio_converter/audio.py b/label_studio_converter/audio.py
@@ -10,7 +10,9 @@
 logger = logging.getLogger(__name__)
 
 
-def convert_to_asr_json_manifest(input_data, output_dir, data_key, project_dir, upload_dir, download_resources):
+def convert_to_asr_json_manifest(
+    input_data, output_dir, data_key, project_dir, upload_dir, download_resources
+):
     audio_dir_rel = 'audio'
     output_audio_dir = os.path.join(output_dir, audio_dir_rel)
     ensure_dir(output_dir), ensure_dir(output_audio_dir)
@@ -19,13 +21,24 @@ def convert_to_asr_json_manifest(input_data, output_dir, data_key, project_dir,
         for item in input_data:
             audio_path = item['input'][data_key]
             try:
-                audio_path = download(audio_path, output_audio_dir, project_dir=project_dir, upload_dir=upload_dir,
-                                      return_relative_path=True, download_resources=download_resources)
-                duration = get_audio_duration(os.path.join(output_audio_dir, os.path.basename(audio_path)))
+                audio_path = download(
+                    audio_path,
+                    output_audio_dir,
+                    project_dir=project_dir,
+                    upload_dir=upload_dir,
+                    return_relative_path=True,
+                    download_resources=download_resources,
+                )
+                duration = get_audio_duration(
+                    os.path.join(output_audio_dir, os.path.basename(audio_path))
+                )
             except:
-                logger.info('Unable to download {image_path} or get audio duration. The item {item} will be skipped'.format(
-                    image_path=audio_path, item=item
-                ), exc_info=True)
+                logger.info(
+                    'Unable to download {image_path} or get audio duration. The item {item} will be skipped'.format(
+                        image_path=audio_path, item=item
+                    ),
+                    exc_info=True,
+                )
                 continue
 
             for texts in iter(item['output'].values()):
@@ -37,7 +50,7 @@ def convert_to_asr_json_manifest(input_data, output_dir, data_key, project_dir,
                 'audio_filepath': audio_path,
                 'duration': duration,
                 'text': transcript,
-                'annotator': _get_annotator(item, default='')
+                'annotator': _get_annotator(item, default=''),
             }
             json.dump(metadata, fout)
             fout.write('\n')
diff --git a/label_studio_converter/brush.py b/label_studio_converter/brush.py
@@ -48,39 +48,39 @@ def __init__(self, data):
         self.i = 0
 
     def read(self, size):
-        out = self.data[self.i:self.i + size]
+        out = self.data[self.i : self.i + size]
         self.i += size
         return int(out, 2)
 
 
 def access_bit(data, num):
-    """ from bytes array to bits by num position
-    """
+    """from bytes array to bits by num position"""
     base = int(num // 8)
     shift = 7 - int(num % 8)
     return (data[base] & (1 << shift)) >> shift
 
 
 def bytes2bit(data):
-    """ get bit string from bytes data
-    """
+    """get bit string from bytes data"""
     return ''.join([str(access_bit(data, i)) for i in range(len(data) * 8)])
 
 
 def decode_rle(rle, print_params: bool = False):
-    """ from LS RLE to numpy uint8 3d image [width, height, channel]
-    
+    """from LS RLE to numpy uint8 3d image [width, height, channel]
+
     Args:
         print_params (bool, optional): If true, a RLE parameters print statement is suppressed
     """
     input = InputStream(bytes2bit(rle))
     num = input.read(32)
     word_size = input.read(5) + 1
     rle_sizes = [input.read(4) + 1 for _ in range(4)]
-    
+
     if print_params:
-        print('RLE params:', num, 'values', word_size, 'word_size', rle_sizes, 'rle_sizes')
-
+        print(
+            'RLE params:', num, 'values', word_size, 'word_size', rle_sizes, 'rle_sizes'
+        )
+
     i = 0
     out = np.zeros(num, dtype=np.uint8)
     while i < num:
@@ -99,13 +99,15 @@ def decode_rle(rle, print_params: bool = False):
 
 
 def decode_from_annotation(from_name, results):
-    """ from LS annotation to {"tag_name + label_name": [numpy uint8 image (width x height)]}
-    """
+    """from LS annotation to {"tag_name + label_name": [numpy uint8 image (width x height)]}"""
     layers = {}
     counters = defaultdict(int)
     for result in results:
-        key = 'brushlabels' if result['type'].lower() == 'brushlabels' else \
-            ('labels' if result['type'].lower() == 'labels' else None)
+        key = (
+            'brushlabels'
+            if result['type'].lower() == 'brushlabels'
+            else ('labels' if result['type'].lower() == 'labels' else None)
+        )
         if key is None or 'rle' not in result:
             continue
 
@@ -125,18 +127,36 @@ def decode_from_annotation(from_name, results):
     return layers
 
 
-def save_brush_images_from_annotation(task_id, annotation_id, completed_by,
-                                      from_name, results, out_dir, out_format='numpy'):
+def save_brush_images_from_annotation(
+    task_id,
+    annotation_id,
+    completed_by,
+    from_name,
+    results,
+    out_dir,
+    out_format='numpy',
+):
     layers = decode_from_annotation(from_name, results)
     if isinstance(completed_by, dict):
         email = completed_by.get('email', '')
     else:
         email = str(completed_by)
-    email = "".join(x for x in email if x.isalnum() or x == '@' or x == '.')  # sanitize filename
+    email = "".join(
+        x for x in email if x.isalnum() or x == '@' or x == '.'
+    )  # sanitize filename
 
     for name in layers:
-        filename = os.path.join(out_dir, 'task-' + str(task_id) + '-annotation-' + str(annotation_id)
-                                + '-by-' + email + '-' + name)
+        filename = os.path.join(
+            out_dir,
+            'task-'
+            + str(task_id)
+            + '-annotation-'
+            + str(annotation_id)
+            + '-by-'
+            + email
+            + '-'
+            + name,
+        )
         image = layers[name]
         logger.debug(f'Save image to {filename}')
         if out_format == 'numpy':
@@ -149,16 +169,21 @@ def save_brush_images_from_annotation(task_id, annotation_id, completed_by,
 
 
 def convert_task(item, out_dir, out_format='numpy'):
-    """ Task with multiple annotations to brush images, out_format = numpy | png
-    """
+    """Task with multiple annotations to brush images, out_format = numpy | png"""
     for from_name, results in item['output'].items():
-        save_brush_images_from_annotation(item['id'], item['annotation_id'], item['completed_by'],
-                                          from_name, results, out_dir, out_format)
+        save_brush_images_from_annotation(
+            item['id'],
+            item['annotation_id'],
+            item['completed_by'],
+            from_name,
+            results,
+            out_dir,
+            out_format,
+        )
 
 
 def convert_task_dir(items, out_dir, out_format='numpy'):
-    """ Directory with tasks and annotation to brush images, out_format = numpy | png
-    """
+    """Directory with tasks and annotation to brush images, out_format = numpy | png"""
     for item in items:
         convert_task(item, out_dir, out_format)
 
@@ -170,7 +195,7 @@ def convert_task_dir(items, out_dir, out_format='numpy'):
 
 
 def bits2byte(arr_str, n=8):
-    """ Convert bits back to byte
+    """Convert bits back to byte
 
     :param arr_str:  string with the bit array
     :type arr_str: str
@@ -180,31 +205,31 @@ def bits2byte(arr_str, n=8):
     :type rle: list
     """
     rle = []
-    numbers = [arr_str[i:i + n] for i in range(0, len(arr_str), n)]
+    numbers = [arr_str[i : i + n] for i in range(0, len(arr_str), n)]
     for i in numbers:
         rle.append(int(i, 2))
     return rle
 
 
 # Shamelessly plagiarized from https://stackoverflow.com/a/32681075/6051733
 def base_rle_encode(inarray):
-    """ run length encoding. Partial credit to R rle function.
-        Multi datatype arrays catered for including non Numpy
-        returns: tuple (runlengths, startpositions, values) """
-    ia = np.asarray(inarray)                # force numpy
+    """run length encoding. Partial credit to R rle function.
+    Multi datatype arrays catered for including non Numpy
+    returns: tuple (runlengths, startpositions, values)"""
+    ia = np.asarray(inarray)  # force numpy
     n = len(ia)
     if n == 0:
         return None, None, None
     else:
-        y = ia[1:] != ia[:-1]               # pairwise unequal (string safe)
-        i = np.append(np.where(y), n - 1)   # must include last element posi
-        z = np.diff(np.append(-1, i))       # run lengths
-        p = np.cumsum(np.append(0, z))[:-1] # positions
+        y = ia[1:] != ia[:-1]  # pairwise unequal (string safe)
+        i = np.append(np.where(y), n - 1)  # must include last element posi
+        z = np.diff(np.append(-1, i))  # run lengths
+        p = np.cumsum(np.append(0, z))[:-1]  # positions
         return z, p, ia[i]
 
 
 def encode_rle(arr, wordsize=8, rle_sizes=[3, 4, 8, 16]):
-    """ Encode a 1d array to rle
+    """Encode a 1d array to rle
 
 
     :param arr: flattened np.array from a 4d image (R, G, B, alpha)
@@ -287,17 +312,16 @@ def encode_rle(arr, wordsize=8, rle_sizes=[3, 4, 8, 16]):
 
             # rle size = 16 or longer
             else:
-
                 length_temp = length_reeks
-                while length_temp > 2 ** 16:
+                while length_temp > 2**16:
                     # Starting with a 1 indicates that we have started a series
                     out_str += '1'
 
                     out_str += '11'
                     out_str += f'{2 ** 16 - 1:016b}'
 
                     out_str += f'{value:08b}'
-                    length_temp -= 2 ** 16
+                    length_temp -= 2**16
 
                 # Starting with a 1 indicates that we have started a series
                 out_str += '1'
@@ -328,40 +352,43 @@ def contour2rle(contours, contour_id, img_width, img_height):
     :type img_width: int
     :param img_height: image shape height
     :type img_height: int
-    :return: list of ints in RLE format 
+    :return: list of ints in RLE format
     """
     import cv2  # opencv
+
     mask_im = np.zeros((img_width, img_height, 4))
-    mask_contours = cv2.drawContours(mask_im, contours, contour_id, color=(0, 255, 0, 100), thickness=-1)
+    mask_contours = cv2.drawContours(
+        mask_im, contours, contour_id, color=(0, 255, 0, 100), thickness=-1
+    )
     rle_out = encode_rle(mask_contours.ravel().astype(int))
     return rle_out
 
 
 def mask2rle(mask):
-    """ Convert mask to RLE
-    
+    """Convert mask to RLE
+
     :param mask: uint8 or int np.array mask with len(shape) == 2 like grayscale image
-    :return: list of ints in RLE format 
+    :return: list of ints in RLE format
     """
     assert len(mask.shape) == 2, 'mask must be 2D np.array'
     assert mask.dtype == np.uint8 or mask.dtype == int, 'mask must be uint8 or int'
     array = mask.ravel()
-    array = np.repeat(array, 4)  # must be 4 channels 
+    array = np.repeat(array, 4)  # must be 4 channels
     rle = encode_rle(array)
     return rle
-    
-    
+
+
 def image2rle(path):
-    """ Convert mask image (jpg, png) to RLE
+    """Convert mask image (jpg, png) to RLE
 
     1. Read image as grayscale
     2. Flatten to 1d array
     3. Threshold > 128
     4. Encode
-    
-    :param path: path to image with mask (jpg, png), this image will be thresholded with values > 128 to obtain mask, 
+
+    :param path: path to image with mask (jpg, png), this image will be thresholded with values > 128 to obtain mask,
                  so you can mark background as black and foreground as white
-    :return: list of ints in RLE format 
+    :return: list of ints in RLE format
     """
     with Image.open(path).convert('L') as image:
         mask = np.array((np.array(image) > 128) * 255, dtype=np.uint8)
@@ -371,18 +398,26 @@ def image2rle(path):
         return rle, image.size[0], image.size[1]
 
 
-def image2annotation(path, label_name, from_name, to_name, ground_truth=False, model_version=None, score=None):
-    """ Convert image with mask to brush RLE annotation
+def image2annotation(
+    path,
+    label_name,
+    from_name,
+    to_name,
+    ground_truth=False,
+    model_version=None,
+    score=None,
+):
+    """Convert image with mask to brush RLE annotation
 
-    :param path: path to image with mask (jpg, png), this image will be thresholded with values > 128 to obtain mask, 
+    :param path: path to image with mask (jpg, png), this image will be thresholded with values > 128 to obtain mask,
                  so you can mark background as black and foreground as white
     :param label_name: label name from labeling config (<Label>)
     :param from_name: brush tag name (<BrushLabels>)
     :param to_name: image tag name (<Image>)
     :param ground_truth: ground truth annotation true/false
     :param model_version: any string, only for predictions
     :param score: model score as float, only for predictions
-    
+
     :return: dict with Label Studio Annotation or Prediction (Pre-annotation)
     """
     rle, width, height = image2rle(path)
@@ -391,19 +426,13 @@ def image2annotation(path, label_name, from_name, to_name, ground_truth=False, m
             {
                 "id": str(uuid.uuid4())[0:8],
                 "type": "brushlabels",
-                "value": {
-                    "rle": rle,
-                    "format": "rle",
-                    "brushlabels": [
-                        label_name
-                    ]
-                },
+                "value": {"rle": rle, "format": "rle", "brushlabels": [label_name]},
                 "origin": "manual",
                 "to_name": to_name,
                 "from_name": from_name,
                 "image_rotation": 0,
                 "original_width": width,
-                "original_height": height
+                "original_height": height,
             }
         ],
     }