Skip to content

Commit

Permalink
change format for grounding
Browse files Browse the repository at this point in the history
  • Loading branch information
pj-ms committed Aug 31, 2023
1 parent a340496 commit 260d783
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 16 deletions.
4 changes: 2 additions & 2 deletions COCO_DATA_FORMAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ This is Visual Question & Answering with bboxes in the groundtruth.
{"id": 2, "zip_file": "test2.zip", "file_name": "test/1/image_2.jpg"}
],
"annotations": [
{"image_id": 1, "id": 1, "question": "whats animal are in the image?", "answer": [{"text": "a cat", "bbox": [10, 10, 100, 100]}, {"text": "a bird", "bbox": [15, 15, 30, 30]}]},
{"image_id": 2, "id": 2, "question": "What is the title of the book on the shelf?", "answer": [{"text": "a cat", "bbox": [10, 10, 100, 100]}]}
{"image_id": 1, "id": 1, "question": "whats animal are in the image?", "answer": "cat and bird", "grounding": [{"text": "a cat", "bbox": [10, 10, 100, 100]}, {"text": "a bird", "bbox": [15, 15, 30, 30]}]},
{"image_id": 2, "id": 2, "question": "What is the title and auther of the book on the shelf?", "answer": "Tile is baking and auther is John", : [{"text": "Title: Baking", "bbox": [10, 10, 100, 100]}, {"text": "Author: John", "bbox": [0, 0, 50, 50]}]}
]
}
```
Expand Down
38 changes: 33 additions & 5 deletions tests/resources/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,18 +315,46 @@ class VisualObjectGroundingTestCases:
{"id": 1, "file_name": "test1.zip@test/0/image_1.jpg"}, {"id": 2, "file_name": "test2.zip@test/1/image_2.jpg"}
],
"annotations": [
{"image_id": 1, "id": 1, "question": "where are the apples", "answer": [{"text": "left top corner", "bbox": [0, 10, 10, 10]}]},
{"image_id": 2, "id": 2, "question": "where are the banana", "answer": [{"text": "right bottom corner", "bbox": [90, 90, 10, 10]}]},
{
"image_id": 1,
"id": 1,
"question": "where are the apples",
"answer": "who knows",
"grounding": [{"text": "left top corner", "bbox": [0, 10, 10, 10]}]},
{
"image_id": 2,
"id": 2,
"question": "where are the banana",
"answer": "check the grounding",
"grounding": [{"text": "right bottom corner", "bbox": [90, 90, 10, 10]}]
},
]
},
{
"images": [
{"id": 1, "file_name": "test1.zip@test/0/image_1.jpg"}, {"id": 2, "file_name": "test2.zip@test/1/image_2.jpg"}, {"id": 3, "file_name": "test2.zip@test/1/image_3.jpg"}
],
"annotations": [
{"image_id": 1, "id": 1, "question": "Describe the image", "answer": [{"text": "left top corner", "bbox": [0, 10, 10, 10]}, {"text": "right bottom corner", "bbox": [90, 90, 10, 10]}]},
{"image_id": 2, "id": 2, "question": "where is an banana", "answer": [{"text": "mid of the image", "bbox": [50, 50, 10, 10]}]},
{"image_id": 2, "id": 3, "question": "describe the top half of the image", "answer": [{"text": "Sun rise", "bbox": [0, 0, 100, 50]}]},
{
"image_id": 1,
"id": 1,
"question": "Describe the image",
"answer": "many books",
"grounding": [{"text": "20 books", "bbox": [0, 10, 10, 10]}, {"text": "10 books", "bbox": [90, 90, 10, 10]}]},
{
"image_id": 2,
"id": 2,
"question": "where is an banana",
"answer": "present in the image",
"grounding": [{"text": "mid of the image", "bbox": [50, 50, 10, 10]}]
},
{
"image_id": 2,
"id": 3,
"question": "describe the top half of the image",
"answer": "ok",
"grounding": [{"text": "Sun rise", "bbox": [0, 0, 100, 50]}]
},
]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ def __init__(self) -> None:
super().__init__(DatasetTypes.VISUAL_OBJECT_GROUNDING)

def process_label(self, image: ImageDataManifest, annotation: dict, coco_manifest: dict):
image.labels.append(VisualObjectGroundingLabelManifest({"question": annotation['question'], "answer": annotation['answer']},
additional_info=self._get_additional_info(annotation, {'id', 'question', 'answer'})))
image.labels.append(VisualObjectGroundingLabelManifest({'question': annotation['question'], 'answer': annotation['answer'], 'grounding': annotation['grounding']},
additional_info=self._get_additional_info(annotation, {'id', 'question', 'answer', 'grounding'})))
21 changes: 14 additions & 7 deletions vision_datasets/visual_object_grounding/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ..common import ImageLabelManifest


class GroundingAnswer:
class Grounding:
def __init__(self, label_data: dict):
self._label_data = label_data

Expand All @@ -17,24 +17,31 @@ def bbox(self):

class VisualObjectGroundingLabelManifest(ImageLabelManifest):
"""
{"question": "a question about the image", "answer": [{"text": " in text", "bbox": [left, top, right, bottom]}, ...]}
{"question": "a question about the image", "answer": "generic caption or answer to the question", "grounding": [{"text": "....", "bbox": [left, top, right, bottom]}, ...]}
"""

def _read_label_data(self):
raise NotImplementedError

def _check_label(self, label_data):
if label_data is None or "question" not in label_data or "answer" not in label_data:
def is_present(key):
return key in label_data and label_data[key] is not None

if label_data is None or any(not is_present(key) for key in ['question', 'answer', 'grounding']):
raise ValueError

for ans in label_data["answer"]:
if "text" not in ans or "bbox" not in ans or len(ans['bbox']) != 4:
for grounding in label_data["grounding"]:
if "text" not in grounding or "bbox" not in grounding or len(grounding['bbox']) != 4:
raise ValueError

@property
def question(self) -> str:
return self._label_data["question"]

@property
def answer(self) -> List[GroundingAnswer]:
return [GroundingAnswer(x) for x in self._label_data["answer"]]
def answer(self) -> str:
return self._label_data["answer"]

@property
def grounding(self) -> List[Grounding]:
return [Grounding(x) for x in self._label_data["grounding"]]

0 comments on commit 260d783

Please sign in to comment.