Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chongyangbai/multilabel2kvp #86

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,13 @@ print(target)
You can convert an existing IC/OD VisionDataset to the generalized KVP format using the following adapter:

```{python}
# For IC dataset
from vision_datasets.image_classification import ClassificationAsKeyValuePairDataset
sample_ic_dataset = VisionDataset(dataset_info, dataset_manifest)
kvp_dataset = ClassificationAsKeyValuePairDataset(sample_ic_dataset)
# For MultiClass and MultiLabel IC dataset
from vision_datasets.image_classification import MultiClassAsKeyValuePairDataset, MultiLabelAsKeyValuePairDataset
sample_multiclass_ic_dataset = VisionDataset(dataset_info, dataset_manifest)
kvp_dataset = MultiClassAsKeyValuePairDataset(sample_multiclass_ic_dataset)
sample_multilabel_ic_dataset = VisionDataset(dataset_info, dataset_manifest)
kvp_dataset = MultiLabelAsKeyValuePairDataset(sample_multilabel_ic_dataset)


# For OD dataset
from vision_datasets.image_object_detection import DetectionAsKeyValuePairDataset
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import setuptools
from os import path

VERSION = '1.0.15'
VERSION = '1.0.16'

# Get the long description from the README file
here = path.abspath(path.dirname(__file__))
Expand Down
67 changes: 52 additions & 15 deletions tests/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import json
import pathlib
import tempfile

from PIL import Image

Expand Down Expand Up @@ -43,22 +42,21 @@ def create_an_od_manifest(root_dir='', n_images=2, n_categories=4):
return CocoManifestAdaptorFactory.create(DatasetTypes.IMAGE_OBJECT_DETECTION).create_dataset_manifest(coco_path.name, root_dir)

@staticmethod
def create_an_od_dataset(n_images=2, n_categories=4, coordinates='relative'):
def create_an_od_dataset(tempdir, n_images=2, n_categories=4, coordinates='relative'):
dataset_dict = copy.deepcopy(DetectionTestFixtures.DATASET_INFO_DICT)

tempdir = tempfile.TemporaryDirectory()
dataset_dict['root_folder'] = tempdir.name
dataset_dict['root_folder'] = tempdir
dataset_dict['type'] = 'object_detection'
for i in range(n_images):
Image.new('RGB', (100, 100)).save(pathlib.Path(tempdir.name) / f'{i + 1}.jpg')
Image.new('RGB', (100, 100)).save(pathlib.Path(tempdir) / f'{i + 1}.jpg')

dataset_info = DatasetInfo(dataset_dict)
dataset_manifest = DetectionTestFixtures.create_an_od_manifest(tempdir.name, n_images, n_categories)
dataset_manifest = DetectionTestFixtures.create_an_od_manifest(tempdir, n_images, n_categories)
dataset = VisionDataset(dataset_info, dataset_manifest, coordinates)
return dataset, tempdir
return dataset


class MultilcassClassificationTestFixtures:
class MulticlassClassificationTestFixtures:
DATASET_INFO_DICT = {
"name": "dummy",
"version": 1,
Expand All @@ -74,17 +72,16 @@ class MultilcassClassificationTestFixtures:
}

@staticmethod
def create_an_ic_dataset(n_images=2, n_categories=3):
dataset_dict = copy.deepcopy(MultilcassClassificationTestFixtures.DATASET_INFO_DICT)
tempdir = tempfile.TemporaryDirectory()
dataset_dict['root_folder'] = tempdir.name
def create_an_ic_dataset(tempdir, n_images=2, n_categories=3):
dataset_dict = copy.deepcopy(MulticlassClassificationTestFixtures.DATASET_INFO_DICT)
dataset_dict['root_folder'] = tempdir
for i in range(n_images):
Image.new('RGB', (100, 100)).save(pathlib.Path(tempdir.name) / f'{i + 1}.jpg')
Image.new('RGB', (100, 100)).save(pathlib.Path(tempdir) / f'{i + 1}.jpg')

dataset_info = DatasetInfo(dataset_dict)
dataset_manifest = MultilcassClassificationTestFixtures.create_an_ic_manifest(tempdir.name, n_images, n_categories)
dataset_manifest = MulticlassClassificationTestFixtures.create_an_ic_manifest(tempdir, n_images, n_categories)
dataset = VisionDataset(dataset_info, dataset_manifest)
return dataset, tempdir
return dataset

@staticmethod
def create_an_ic_manifest(root_dir='', n_images=2, n_categories=3):
Expand All @@ -98,3 +95,43 @@ def create_an_ic_manifest(root_dir='', n_images=2, n_categories=3):
coco_path = pathlib.Path(root_dir) / 'coco.json'
coco_path.write_text(json.dumps(coco_dict))
return CocoManifestAdaptorFactory.create(DatasetTypes.IMAGE_CLASSIFICATION_MULTICLASS).create_dataset_manifest(coco_path.name, root_dir)


class MultilabelClassificationTestFixtures:
DATASET_INFO_DICT = {
"name": "dummy",
"version": 1,
"type": "image_classification_multilabel",
"root_folder": "dummy",
"format": "coco",
"test": {
"index_path": "train.json",
"files_for_local_usage": [
"train.zip"
]
},
}

@staticmethod
def create_an_ic_dataset(tempdir, n_images=2, n_categories=3):
dataset_dict = copy.deepcopy(MultilabelClassificationTestFixtures.DATASET_INFO_DICT)
dataset_dict['root_folder'] = tempdir
for i in range(n_images):
Image.new('RGB', (100, 100)).save(pathlib.Path(tempdir) / f'{i + 1}.jpg')

dataset_info = DatasetInfo(dataset_dict)
dataset_manifest = MultilabelClassificationTestFixtures.create_an_ic_manifest(tempdir, n_images, n_categories)
dataset = VisionDataset(dataset_info, dataset_manifest)
return dataset

@staticmethod
def create_an_ic_manifest(root_dir='', n_images=2, n_categories=3):
images = [{'id': i + 1, 'file_name': f'{i + 1}.jpg', 'width': 100, 'height': 100} for i in range(n_images)]

categories = [{'id': i + 1, 'name': f'{i + 1}-class', } for i in range(n_categories)]
annotations = [{'id': i + 1, 'image_id': i + 1, 'category_id': i + 1} for i in range(n_images)]
annotations.extend([{'id': n_images + i + 1, 'image_id': i + 1, 'category_id': n_images - i} for i in range(n_images)])
coco_dict = {'images': images, 'categories': categories, 'annotations': annotations}
coco_path = pathlib.Path(root_dir) / 'coco.json'
coco_path.write_text(json.dumps(coco_dict))
return CocoManifestAdaptorFactory.create(DatasetTypes.IMAGE_CLASSIFICATION_MULTILABEL).create_dataset_manifest(coco_path.name, root_dir)
93 changes: 63 additions & 30 deletions tests/test_ic_od_to_kvp_wrapper/test_classification_as_kvp.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,74 @@
import tempfile
import unittest

from tests.test_fixtures import MultilcassClassificationTestFixtures
from tests.test_fixtures import MulticlassClassificationTestFixtures, MultilabelClassificationTestFixtures
from vision_datasets.common import DatasetTypes
from vision_datasets.image_classification import ClassificationAsKeyValuePairDataset
from vision_datasets.image_classification import MultiClassAsKeyValuePairDataset, MultiLabelAsKeyValuePairDataset
from vision_datasets.key_value_pair.manifest import KeyValuePairLabelManifest


class TestClassificationAsKeyValuePairDataset(unittest.TestCase):
def test_multiclass_classification(self):
sample_classification_dataset, _ = MultilcassClassificationTestFixtures.create_an_ic_dataset()
kvp_dataset = ClassificationAsKeyValuePairDataset(sample_classification_dataset)

self.assertIsInstance(kvp_dataset, ClassificationAsKeyValuePairDataset)
self.assertEqual(kvp_dataset.dataset_info.type, DatasetTypes.KEY_VALUE_PAIR)
self.assertIn("name", kvp_dataset.dataset_info.schema)
self.assertIn("description", kvp_dataset.dataset_info.schema)
self.assertIn("fieldSchema", kvp_dataset.dataset_info.schema)

print(kvp_dataset.dataset_info.schema["fieldSchema"])

self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"],
{"className": {
"type": "string",
"description": "Class name that the image belongs to.",
"classes": {
"1-class": {"description": "A single class name. Only output 1-class as the class name if present."},
"2-class": {"description": "A single class name. Only output 2-class as the class name if present."},
"3-class": {"description": "A single class name. Only output 3-class as the class name if present."},
}
}
})

_, target, _ = kvp_dataset[0]
self.assertIsInstance(target, KeyValuePairLabelManifest)
self.assertEqual(target.label_data,
{"fields": {"className": {"value": "1-class"}}}
)
with tempfile.TemporaryDirectory() as tempdir:
sample_classification_dataset = MulticlassClassificationTestFixtures.create_an_ic_dataset(tempdir)
kvp_dataset = MultiClassAsKeyValuePairDataset(sample_classification_dataset)

self.assertIsInstance(kvp_dataset, MultiClassAsKeyValuePairDataset)
self.assertEqual(kvp_dataset.dataset_info.type, DatasetTypes.KEY_VALUE_PAIR)
self.assertIn("name", kvp_dataset.dataset_info.schema)
self.assertIn("description", kvp_dataset.dataset_info.schema)
self.assertIn("fieldSchema", kvp_dataset.dataset_info.schema)

self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"],
{"className": {
"type": "string",
"description": "Class name that the image belongs to.",
"classes": {
"1-class": {"description": "A single class name. Only output 1-class as the class name if present."},
"2-class": {"description": "A single class name. Only output 2-class as the class name if present."},
"3-class": {"description": "A single class name. Only output 3-class as the class name if present."},
}
}
})

_, target, _ = kvp_dataset[0]
self.assertIsInstance(target, KeyValuePairLabelManifest)
self.assertEqual(target.label_data,
{"fields": {"className": {"value": "1-class"}}}
)

def test_multilabel_classification(self):
with tempfile.TemporaryDirectory() as tempdir:
sample_classification_dataset = MultilabelClassificationTestFixtures.create_an_ic_dataset(tempdir, n_images=2, n_categories=2)
kvp_dataset = MultiLabelAsKeyValuePairDataset(sample_classification_dataset)

self.assertIsInstance(kvp_dataset, MultiLabelAsKeyValuePairDataset)
self.assertEqual(kvp_dataset.dataset_info.type, DatasetTypes.KEY_VALUE_PAIR)
self.assertIn("name", kvp_dataset.dataset_info.schema)
self.assertIn("description", kvp_dataset.dataset_info.schema)
self.assertIn("fieldSchema", kvp_dataset.dataset_info.schema)

self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"],
{'classNames': {
'type': 'array',
'description': 'Class names that the image belongs to.',
'items': {
'type': 'string',
'classes': {
'1-class': {'description': 'A single class name. Only output 1-class as the class name if present.'},
'2-class': {'description': 'A single class name. Only output 2-class as the class name if present.'}
}
}
}
}
)

_, target, _ = kvp_dataset[0]
self.assertIsInstance(target, KeyValuePairLabelManifest)
self.assertEqual(target.label_data,
{'fields': {
'classNames': {'value': ['1-class', '2-class']}}
})


if __name__ == '__main__':
Expand Down
59 changes: 31 additions & 28 deletions tests/test_ic_od_to_kvp_wrapper/test_detection_as_kvp.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import tempfile
import unittest

from tests.test_fixtures import DetectionTestFixtures
Expand All @@ -8,37 +9,39 @@

class TestDetectionAsKeyValuePairDataset(unittest.TestCase):
def test_detection_to_kvp(self):
sample_detection_dataset, _ = DetectionTestFixtures.create_an_od_dataset()
kvp_dataset = DetectionAsKeyValuePairDataset(sample_detection_dataset)

self.assertIsInstance(kvp_dataset, DetectionAsKeyValuePairDataset)
self.assertEqual(kvp_dataset.dataset_info.type, DatasetTypes.KEY_VALUE_PAIR)
self.assertIn("name", kvp_dataset.dataset_info.schema)
self.assertIn("description", kvp_dataset.dataset_info.schema)
self.assertIn("fieldSchema", kvp_dataset.dataset_info.schema)

self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"],
{'detectedObjects': {'type': 'array', 'description': 'Objects in the image of the specified classes, with bounding boxes',
'items': {'type': 'string', 'description': 'Class name of the object',
'classes': {'1-class': {},
'2-class': {},
'3-class': {},
'4-class': {}},
'includeGrounding': True}}})

_, target, _ = kvp_dataset[0]
self.assertIsInstance(target, KeyValuePairLabelManifest)
self.assertEqual(target.label_data,
{'fields': {'detectedObjects': {'value': [{'value': '1-class', 'groundings': [[0, 0, 100, 100]]},
{'value': '2-class', 'groundings': [[10, 10, 50, 100]]}]}}
})
with tempfile.TemporaryDirectory() as tempdir:
sample_detection_dataset = DetectionTestFixtures.create_an_od_dataset(tempdir)
kvp_dataset = DetectionAsKeyValuePairDataset(sample_detection_dataset)

self.assertIsInstance(kvp_dataset, DetectionAsKeyValuePairDataset)
self.assertEqual(kvp_dataset.dataset_info.type, DatasetTypes.KEY_VALUE_PAIR)
self.assertIn("name", kvp_dataset.dataset_info.schema)
self.assertIn("description", kvp_dataset.dataset_info.schema)
self.assertIn("fieldSchema", kvp_dataset.dataset_info.schema)

self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"],
{'detectedObjects': {'type': 'array', 'description': 'Objects in the image of the specified classes, with bounding boxes',
'items': {'type': 'string', 'description': 'Class name of the object',
'classes': {'1-class': {},
'2-class': {},
'3-class': {},
'4-class': {}},
'includeGrounding': True}}})

_, target, _ = kvp_dataset[0]
self.assertIsInstance(target, KeyValuePairLabelManifest)
self.assertEqual(target.label_data,
{'fields': {'detectedObjects': {'value': [{'value': '1-class', 'groundings': [[0, 0, 100, 100]]},
{'value': '2-class', 'groundings': [[10, 10, 50, 100]]}]}}
})

def test_single_class_description(self):
sample_detection_dataset, _ = DetectionTestFixtures.create_an_od_dataset(n_categories=1)
kvp_dataset = DetectionAsKeyValuePairDataset(sample_detection_dataset)
with tempfile.TemporaryDirectory() as tempdir:
sample_detection_dataset = DetectionTestFixtures.create_an_od_dataset(tempdir, n_categories=1)
kvp_dataset = DetectionAsKeyValuePairDataset(sample_detection_dataset)

self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"]['detectedObjects']['items']['classes'],
{'1-class': {"description": "Always output 1-class as the class."}})
self.assertEqual(kvp_dataset.dataset_info.schema["fieldSchema"]['detectedObjects']['items']['classes'],
{'1-class': {"description": "Always output 1-class as the class."}})


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions vision_datasets/image_classification/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .coco_manifest_adaptor import MultiClassClassificationCocoManifestAdaptor, MultiLabelClassificationCocoManifestAdaptor
from .operations import ImageClassificationCocoDictGenerator
from .manifest import ImageClassificationLabelManifest
from .classification_as_kvp_dataset import ClassificationAsKeyValuePairDataset
from .classification_as_kvp_dataset import MultiClassAsKeyValuePairDataset, MultiLabelAsKeyValuePairDataset

__all__ = ['MultiClassClassificationCocoManifestAdaptor', 'MultiLabelClassificationCocoManifestAdaptor',
'ImageClassificationCocoDictGenerator',
'ImageClassificationLabelManifest',
'ClassificationAsKeyValuePairDataset']
'MultiClassAsKeyValuePairDataset', 'MultiLabelAsKeyValuePairDataset']
Loading
Loading