Skip to content
This repository has been archived by the owner on Aug 21, 2024. It is now read-only.

feat: Import yolo polygons. Includes conversion from bboxes to polygons if user is wishing to transition dataset. #257

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
tmp/

# Translations
*.mo
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ optional arguments:
--from-name FROM_NAME
control tag name from Label Studio labeling config
--out-type OUT_TYPE annotation type - "annotations" or "predictions"
--yolo-type YOLO_TYPE label type - "rectanglelabels" or "polygonlabels"
--image-root-url IMAGE_ROOT_URL
root URL path where images will be hosted, e.g.:
http://example.com/images or s3://my-bucket
Expand Down
39 changes: 30 additions & 9 deletions label_studio_converter/imports/label_config.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,56 @@
from label_studio_converter.imports.colors import COLORS
import logging

from label_studio_converter.imports.colors import COLORS

LABELS = """
<{# TAG_NAME #} name="{# FROM_NAME #}" toName="image">
{# LABELS #} </{# TAG_NAME #}>
"""
POLY_LABELS = """
<{# TAG_NAME #} name="{# FROM_NAME #}" toName="image"
strokeWidth="{# STROKE #}" pointSize="{# POINT #}"
opacity="{# OPACITY #}">
{# LABELS #} </{# TAG_NAME #}>
"""

LABELING_CONFIG = """<View>
<Image name="{# TO_NAME #}" value="$image"/>
{# BODY #}</View>
"""

logger = logging.getLogger('root')

def generate_label_config(
categories, tags, to_name='image', from_name='label', filename=None
):
logger.info(f'Creating your label configuration file with {tags[from_name]}')
labels = ''
for key in sorted(categories.keys()):
color = COLORS[int(key) % len(COLORS)]
label = f' <Label value="{categories[key]}" background="rgba({color[0]}, {color[1]}, {color[2]}, 1)"/>\n'
labels += label

body = ''
for from_name in tags:
tag_body = (
str(LABELS)
.replace('{# TAG_NAME #}', tags[from_name])
.replace('{# LABELS #}', labels)
.replace('{# TO_NAME #}', to_name)
.replace('{# FROM_NAME #}', from_name)
)
for from_name in [tag_key for tag_key in tags.keys() if type(tags[tag_key]) == str]:
if tags[from_name] == 'PolygonLabels':
tag_body = (
str(POLY_LABELS)
.replace('{# TAG_NAME #}', tags[from_name])
.replace('{# LABELS #}', labels)
.replace('{# TO_NAME #}', to_name)
.replace('{# FROM_NAME #}', from_name)
.replace('{# STROKE #}', tags['poly_ops']['stroke'])
.replace('{# POINT #}', tags['poly_ops']['pointSize'])
.replace('{# OPACITY #}', tags['poly_ops']['opacity'])
)
else:
tag_body = (
str(LABELS)
.replace('{# TAG_NAME #}', tags[from_name])
.replace('{# LABELS #}', labels)
.replace('{# TO_NAME #}', to_name)
.replace('{# FROM_NAME #}', from_name)
)
body += f'\n <Header value="{tags[from_name]}"/>' + tag_body

config = (
Expand Down
156 changes: 135 additions & 21 deletions label_studio_converter/imports/yolo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import shutil
from pathlib import Path
import json # better to use "imports ujson as json" for the best performance

import uuid
Expand All @@ -15,6 +17,23 @@

logger = logging.getLogger('root')

def get_data(input_dir, img_exts):
get_labels = lambda files: list( filter(lambda fn: fn.endswith('.txt') and 'classes.txt' not in fn, files if type(files)==list else os.listdir(files)) )
get_images = lambda files: list( filter(lambda fn: any([fn.endswith(img_ext) for img_ext in img_exts]), files if type(files)==list else os.listdir(files)) )
images, labels = [], []
image_labels = {}
for dir_pth, dir_names, files in os.walk(input_dir):
if Path(dir_pth) == Path(input_dir):
continue # skip input_dir. data should be at least one level in from input_dir
dir_imgs, dir_lbls = get_images( files ), get_labels( files )
if len(dir_imgs) > 0:
[images.append(f'{dir_pth}/{img}') for img in dir_imgs]
if len(dir_lbls) > 0:
[labels.append(f'{dir_pth}/{lbl}') for lbl in dir_lbls]
for image, label in zip(images, labels):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no certainty that the images and labels lists will be in the same order, something like for image, label in zip(sorted(images), sorted(labels)) does the trick.

if Path(image).stem == Path(label).stem:
image_labels[image] = label
return images, labels, image_labels

def convert_yolo_to_ls(
input_dir,
Expand All @@ -24,6 +43,7 @@ def convert_yolo_to_ls(
out_type="annotations",
image_root_url='/data/local-files/?d=',
image_ext='.jpg,.jpeg,.png',
yolo_type="rectanglelabels",
image_dims: Optional[Tuple[int, int]] = None,
):
"""Convert YOLO labeling to Label Studio JSON
Expand All @@ -34,40 +54,68 @@ def convert_yolo_to_ls(
:param out_type: annotation type - "annotations" or "predictions"
:param image_root_url: root URL path where images will be hosted, e.g.: http://example.com/images
:param image_ext: image extension/s - single string or comma separated list to search, eg. .jpeg or .jpg, .png and so on.
:param yolo_type: label type - "rectanglelabels" or "polygonlabels"
:param image_dims: image dimensions - optional tuple of integers specifying the image width and height of *all* images in the dataset. Defaults to opening the image to determine it's width and height, which is slower. This should only be used in the special case where you dataset has uniform image dimesions.
"""

tasks = []
logger.info(f'Preparing your {out_type} yolo dataset with {yolo_type} to import into LabelStudio')
logger.info('Reading YOLO notes and categories from %s', input_dir)

# build categories=>labels dict
notes_file = os.path.join(input_dir, 'classes.txt')
with open(notes_file) as f:
lines = [line.strip() for line in f.readlines()]
categories = {i: line for i, line in enumerate(lines)}
logger.info(f'Found {len(categories)} categories')
logger.info(f'Found {len(categories)} categories:')
_= [logger.info(f"\t{i}: {cat}") for i, cat in enumerate(categories.values())]


# generate and save labeling config
label_config_file = out_file.replace('.json', '') + '.label_config.xml'
poly_ops = {'stroke':'3', 'pointSize':'small', 'opacity':'0.2'}
generate_label_config(
categories,
{from_name: 'RectangleLabels'},
{from_name: 'RectangleLabels' if yolo_type == "rectanglelabels" else 'PolygonLabels','poly_ops':poly_ops},
to_name,
from_name,
label_config_file,
)

# define directories
labels_dir = os.path.join(input_dir, 'labels')
images_dir = os.path.join(input_dir, 'images')
logger.info('Converting labels from %s', labels_dir)
# retrieve data (image and label paths). handles datasets with data in subdirectories, e.g. train / val / test
images, labels, image_labels = get_data(input_dir, image_ext)
logger.info('Converting labels found recursively at %s', input_dir)
if yolo_type == 'polygonlabels':
# verify if current labels are boxes
# scan labels list for first non-empty label, peek contents, determine label type
for label in labels:
with open(labels[0]) as f:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be label not labels[0]

sample_lbl = [line.strip() for line in f.readlines()]
if len(sample_lbl) == 0:
continue
else:
break # non-empty label found
logger.info(f'sample label: {sample_lbl}')
if len(sample_lbl) < 7: # Polygons expected to consist of 7 items. At least three x,y pairs + class
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could be wrong here, but I believe that sample_lbl will contain a list of strings, each one being the line of the label file (where lines are separate objects). To test whether the sample label has less than 7 fields you should test something like if len(sample_lbl[0].split(' ')) < 7

logger.info('Your labels are bounding boxes, but you requested polygons. Transforming labels from bboxes to polygons')
polygonise_bboxes(input_dir, labels, out_type)


# build array out of provided comma separated image_extns (str -> array)
image_ext = [x.strip() for x in image_ext.split(",")]
logger.info(f'image extensions->, {image_ext}')

# x_scale = lambda x_prop: round(x_prop*image_width,1)
# y_scale = lambda y_prop: round((y_prop)*image_height,1)

# formatter functions (for percent values rel to 100)
x_scale = lambda x_prop: round(x_prop*100,2)
y_scale = lambda y_prop: round((y_prop)*100,2)

# loop through images
for f in os.listdir(images_dir):
for img in images:
f = Path(img).stem + Path(img).suffix
image_file_found_flag = False
for ext in image_ext:
if f.endswith(ext):
Expand All @@ -83,12 +131,13 @@ def convert_yolo_to_ls(
"data": {
# eg. '../../foo+you.py' -> '../../foo%2Byou.py'
"image": image_root_url
+ str(pathname2url(image_file))
+ str(pathname2url(image_file)),
"storage_filename": image_file
}
}

# define coresponding label file and check existence
label_file = os.path.join(labels_dir, image_file_base + '.txt')
label_file = image_labels[img]

if os.path.exists(label_file):
task[out_type] = [
Expand All @@ -101,40 +150,62 @@ def convert_yolo_to_ls(
# read image sizes
if image_dims is None:
# default to opening file if we aren't given image dims. slow!
with Image.open(os.path.join(images_dir, image_file)) as im:
with Image.open(img) as im:
image_width, image_height = im.size
else:
image_width, image_height = image_dims

with open(label_file) as file:
# convert all bounding boxes to Label Studio Results
lines = file.readlines()

for line in lines:
label_id, x, y, width, height = line.split()
x, y, width, height = (
float(x),
float(y),
float(width),
float(height),
)
item = {
"id": uuid.uuid4().hex[0:10],
"type": "rectanglelabels",
"value": {
if yolo_type == "rectanglelabels":
label_id, x, y, width, height = line.split()[:5]
conf = line.split()[-1] if out_type == 'predictions' else None
x, y, width, height = (
float(x),
float(y),
float(width),
float(height),
)
conf = float(conf) if conf is not None else None
value = {
"x": (x - width / 2) * 100,
"y": (y - height / 2) * 100,
"width": width * 100,
"height": height * 100,
"rotation": 0,
"rectanglelabels": [categories[int(label_id)]],
},
}

elif yolo_type == "polygonlabels":
parts = [float( part ) for part in line.split()]
label_id = int(parts.pop(0))
if out_type == 'predictions':
conf = parts.pop(-1)
xy_pairs = [ [x_scale(parts[i]), y_scale(parts[i+1])] for i in range(0,len(parts),2) ]

value = {
"points": xy_pairs,
"polygonlabels": [categories[int(label_id)]],
}

item = {
"id": uuid.uuid4().hex[0:10],
"type": yolo_type,
"value": value,
"to_name": to_name,
"from_name": from_name,
"image_rotation": 0,
"original_width": image_width,
"original_height": image_height,
}
if out_type == 'predictions':
item["score"] = conf
task[out_type][0]['result'].append(item)



tasks.append(task)

Expand All @@ -154,6 +225,43 @@ def convert_yolo_to_ls(
else:
logger.error('No labels converted')

def polygonise_bboxes(input_dir, labels, out_type):
"""
This function allows the user to seamlessly transform existing bounding boxes
into polygons as they're imported into Label Studio. Ideal for datasets
transitioning from the yolo detect to the yolo segment task.
:param input_dir directory with YOLO where images, labels, notes.json are located
"""
labels_dir = Path(input_dir) / 'labels'
poly_labels_dir = Path(input_dir) / 'labels-seg'
os.makedirs(poly_labels_dir, exist_ok=True)
poly_labels = []
for label in labels:
# verify subrdirectory exists
poly_label_pth = label.replace(str(labels_dir),str(poly_labels_dir))
poly_label_subdir = Path(poly_label_pth).parent
if not os.path.exists(poly_label_subdir):
os.makedirs(poly_label_subdir, exist_ok=True)
with open(label, 'r') as lbl_f:
boxes = [line.strip() for line in lbl_f.readlines()]
poly_boxes = []
for box in boxes:
c, cx, cy, w, h = [float(n) for n in box.split()[:5]]
conf = line.split()[-1] if out_type == 'predictions' else None
x0, y0 = (cx-(w/2), cy+(h/2))
x1, y1 = (cx-(w/2), cy-(h/2))
x2, y2 = (cx+(w/2), cy-(h/2))
x3, y3 = (cx+(w/2), cy+(h/2))
poly_boxes.append(f'{int(c)} {x0} {y0} {x1} {y1} {x2} {y2} {x3} {y3}')
poly_boxes.append(f'{conf}\n' if out_type == 'predictions' else '\n')
with open(poly_label_pth, 'w+') as plbl_f:
plbl_f.write(''.join(poly_boxes))
poly_labels.append(poly_label_pth)
# keep copy of original bboxes labels, and make polygon labels the default one
shutil.move( str(labels_dir),f'{str(labels_dir)}-old_boxes')
shutil.move( str(poly_labels_dir), str(labels_dir) )

# return poly_labels

def add_parser(subparsers):
yolo = subparsers.add_parser('yolo')
Expand Down Expand Up @@ -192,6 +300,12 @@ def add_parser(subparsers):
help='annotation type - "annotations" or "predictions"',
default='annotations',
)
yolo.add_argument(
'--yolo-type',
dest='yolo_type',
help='label type - "rectanglelabels" or "polygonlabels" ',
default='rectangles',
)
yolo.add_argument(
'--image-root-url',
dest='image_root_url',
Expand Down
4 changes: 3 additions & 1 deletion label_studio_converter/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,15 @@ def export(args):


def imports(args):
if args.import_format == 'yolo':
is_seg = 'seg' in args.import_format
if 'yolo' in args.import_format:
import_yolo.convert_yolo_to_ls(
input_dir=args.input,
out_file=args.output,
to_name=args.to_name,
from_name=args.from_name,
out_type=args.out_type,
yolo_type=args.yolo_type,
image_root_url=args.image_root_url,
image_ext=args.image_ext,
)
Expand Down
4 changes: 4 additions & 0 deletions tests/data/test_import_yolo_seg_data/classes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
closed_door
opened_door
bus
number
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
2 0.49765625 0.46200607902735563 0.9640625 0.8693009118541033
3 0.2890625 0.1656534954407295 0.071875 0.13677811550151975
0 0.43125 0.5319148936170213 0.121875 0.7051671732522796
0 0.790625 0.5030395136778115 0.090625 0.5866261398176292
4 changes: 4 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
2 0.54921875 0.5761124121779859 0.7078125 0.585480093676815
3 0.59609375 0.33840749414519905 0.0390625 0.11943793911007025
0 0.503125 0.5971896955503513 0.05625 0.4309133489461358
0 0.303125 0.607728337236534 0.034375 0.3442622950819672
4 changes: 4 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
3 0.2828125 0.23055555555555557 0.0390625 0.06388888888888888
2 0.265625 0.40555555555555556 0.4203125 0.525
0 0.16796875 0.3972222222222222 0.034375 0.3111111111111111
0 0.085546875 0.38958333333333334 0.02734375 0.25972222222222224
4 changes: 4 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
3 0.371875 0.5637651821862348 0.034375 0.0708502024291498
0 0.4609375 0.7773279352226721 0.0625 0.3076923076923077
0 0.6421875 0.7975708502024291 0.05625 0.2874493927125506
2 0.51640625 0.6194331983805668 0.7046875 0.6558704453441295
3 changes: 3 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_5.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
2 0.5453125 0.5176470588235295 0.853125 0.7294117647058823
0 0.51875 0.5329411764705883 0.159375 0.5858823529411765
2 0.07421875 0.5035294117647059 0.1453125 0.6211764705882353
4 changes: 4 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_6.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
0 0.1765625 0.5354166666666667 0.209375 0.5763888888888888
1 0.831640625 0.5319444444444444 0.19453125 0.5472222222222223
2 0.499609375 0.5076388888888889 0.99140625 0.7736111111111111
3 0.325390625 0.4076388888888889 0.03671875 0.06805555555555555
2 changes: 2 additions & 0 deletions tests/data/test_import_yolo_seg_data/labels/img_7.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
2 0.56953125 0.5304449648711944 0.7578125 0.47540983606557374
3 0.24375 0.3805620608899297 0.03125 0.07259953161592506
Loading