Skip to content

Commit

Permalink
Merge pull request #271 from Labelbox/develop
Browse files Browse the repository at this point in the history
3.3.0
  • Loading branch information
msokoloff1 authored Sep 7, 2021
2 parents c33fc04 + e89b9aa commit c186ca9
Show file tree
Hide file tree
Showing 47 changed files with 1,146 additions and 247 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

# Version 3.3.0 (2021-09-02)
## Added
* `Dataset.create_data_rows_sync()` for synchronous bulk uploads of data rows
* `Model.delete()`, `ModelRun.delete()`, and `ModelRun.delete_annotation_groups()` to
Clean up models, model runs, and annotation groups.

## Fix
* Increased timeout for label exports since projects with many segmentation masks weren't finishing quickly enough.

# Version 3.2.1 (2021-08-31)
## Fix
* Resolved issue with `create_data_rows()` was not working on amazon linux
Expand Down
2 changes: 1 addition & 1 deletion labelbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "labelbox"
__version__ = "3.2.1"
__version__ = "3.3.0"

from labelbox.schema.project import Project
from labelbox.client import Client
Expand Down
1 change: 1 addition & 0 deletions labelbox/data/annotation_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from .collection import LabelGenerator

from .metrics import ScalarMetric
from .metrics import MetricAggregation
3 changes: 1 addition & 2 deletions labelbox/data/annotation_types/annotation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import Any, Dict, List, Union

from pydantic.main import BaseModel

from .classification import Checklist, Dropdown, Radio, Text
from .feature import FeatureSchema
from .geometry import Geometry
Expand All @@ -16,6 +14,7 @@ class BaseAnnotation(FeatureSchema):

class ClassificationAnnotation(BaseAnnotation):
"""Class representing classification annotations (annotations that don't have a location) """

value: Union[Text, Checklist, Radio, Dropdown]


Expand Down
22 changes: 19 additions & 3 deletions labelbox/data/annotation_types/classification/classification.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
from typing import Any, Dict, List

from pydantic.main import BaseModel
try:
from typing import Literal
except:
from typing_extensions import Literal

from pydantic import BaseModel, validator
from ..feature import FeatureSchema


# TODO: Replace when pydantic adds support for unions that don't coerce types
class _TempName(BaseModel):
name: str

def dict(self, *args, **kwargs):
res = super().dict(*args, **kwargs)
res.pop('name')
return res


class ClassificationAnswer(FeatureSchema):
"""
- Represents a classification option.
Expand All @@ -19,8 +33,9 @@ class Radio(BaseModel):
answer: ClassificationAnswer


class Checklist(BaseModel):
class Checklist(_TempName):
""" A classification with many selected options allowed """
name: Literal["checklist"] = "checklist"
answer: List[ClassificationAnswer]


Expand All @@ -29,9 +44,10 @@ class Text(BaseModel):
answer: str


class Dropdown(BaseModel):
class Dropdown(_TempName):
"""
- A classification with many selected options allowed .
- This is not currently compatible with MAL.
"""
name: Literal["dropdown"] = "dropdown"
answer: List[ClassificationAnswer]
7 changes: 4 additions & 3 deletions labelbox/data/annotation_types/data/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def value(self) -> np.ndarray:
with open(self.file_path, "rb") as img:
im_bytes = img.read()
self.im_bytes = im_bytes
return self.bytes_to_np(im_bytes)
arr = self.bytes_to_np(im_bytes)
return arr
elif self.url is not None:
im_bytes = self.fetch_remote()
self.im_bytes = im_bytes
Expand All @@ -92,7 +93,7 @@ def value(self) -> np.ndarray:
def set_fetch_fn(self, fn):
object.__setattr__(self, 'fetch_remote', lambda: fn(self))

@retry.Retry(deadline=15.)
@retry.Retry(deadline=60.)
def fetch_remote(self) -> bytes:
"""
Method for accessing url.
Expand All @@ -104,7 +105,7 @@ def fetch_remote(self) -> bytes:
response.raise_for_status()
return response.content

@retry.Retry(deadline=15.)
@retry.Retry(deadline=30.)
def create_url(self, signer: Callable[[bytes], str]) -> str:
"""
Utility for creating a url from any of the other image representations.
Expand Down
4 changes: 3 additions & 1 deletion labelbox/data/annotation_types/label.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import defaultdict
from labelbox.data.annotation_types.metrics.scalar import ScalarMetric

from typing import Any, Callable, Dict, List, Union, Optional

Expand All @@ -21,7 +22,8 @@ class Label(BaseModel):
data: Union[VideoData, ImageData, TextData]
annotations: List[Union[ClassificationAnnotation, ObjectAnnotation,
VideoObjectAnnotation,
VideoClassificationAnnotation, ScalarMetric]] = []
VideoClassificationAnnotation, ScalarMetric,
ScalarMetric]] = []
extra: Dict[str, Any] = {}

def object_annotations(self) -> List[ObjectAnnotation]:
Expand Down
9 changes: 0 additions & 9 deletions labelbox/data/annotation_types/metrics.py

This file was deleted.

2 changes: 2 additions & 0 deletions labelbox/data/annotation_types/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .scalar import ScalarMetric
from .aggregations import MetricAggregation
8 changes: 8 additions & 0 deletions labelbox/data/annotation_types/metrics/aggregations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from enum import Enum


class MetricAggregation(Enum):
ARITHMETIC_MEAN = "ARITHMETIC_MEAN"
GEOMETRIC_MEAN = "GEOMETRIC_MEAN"
HARMONIC_MEAN = "HARMONIC_MEAN"
SUM = "SUM"
26 changes: 26 additions & 0 deletions labelbox/data/annotation_types/metrics/scalar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from labelbox.data.annotation_types.metrics.aggregations import MetricAggregation
from typing import Any, Dict, Optional
from pydantic import BaseModel


class ScalarMetric(BaseModel):
""" Class representing metrics
# For backwards compatibility, metric_name is optional. This will eventually be deprecated
# The metric_name will be set to a default name in the editor if it is not set.
# aggregation will be ignored wihtout providing a metric name.
# Not providing a metric name is deprecated.
"""
value: float
metric_name: Optional[str] = None
feature_name: Optional[str] = None
subclass_name: Optional[str] = None
aggregation: MetricAggregation = MetricAggregation.ARITHMETIC_MEAN
extra: Dict[str, Any] = {}

def dict(self, *args, **kwargs):
res = super().dict(*args, **kwargs)
if res['metric_name'] is None:
res.pop('aggregation')
return {k: v for k, v in res.items() if v is not None}
160 changes: 160 additions & 0 deletions labelbox/data/metrics/group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
Tools for grouping features and labels so that we can compute metrics on the individual groups
"""
from collections import defaultdict
from typing import Dict, List, Tuple, Union
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal

from labelbox.data.annotation_types import Label
from labelbox.data.annotation_types.collection import LabelList
from labelbox.data.annotation_types.feature import FeatureSchema


def get_identifying_key(
features_a: List[FeatureSchema], features_b: List[FeatureSchema]
) -> Union[Literal['name'], Literal['feature_schema_id']]:
"""
Checks to make sure that features in both sets contain the same type of identifying keys.
This can either be the feature name or feature schema id.
Args:
features_a : List of FeatureSchemas (usually ObjectAnnotations or ClassificationAnnotations)
features_b : List of FeatureSchemas (usually ObjectAnnotations or ClassificationAnnotations)
Returns:
The field name that is present in both feature lists.
"""

all_schema_ids_defined_pred, all_names_defined_pred = all_have_key(
features_a)
if (not all_schema_ids_defined_pred and not all_names_defined_pred):
raise ValueError("All data must have feature_schema_ids or names set")

all_schema_ids_defined_gt, all_names_defined_gt = all_have_key(features_b)

# Prefer name becuse the user will be able to know what it means
# Schema id incase that doesn't exist.
if (all_names_defined_pred and all_names_defined_gt):
return 'name'
elif all_schema_ids_defined_pred and all_schema_ids_defined_gt:
return 'feature_schema_id'
else:
raise ValueError(
"Ground truth and prediction annotations must have set all name or feature ids. "
"Otherwise there is no key to match on. Please update.")


def all_have_key(features: List[FeatureSchema]) -> Tuple[bool, bool]:
"""
Checks to make sure that all FeatureSchemas have names set or feature_schema_ids set.
Args:
features (List[FeatureSchema]) :
"""
all_names = True
all_schemas = True
for feature in features:
if feature.name is None:
all_names = False
if feature.feature_schema_id is None:
all_schemas = False
return all_schemas, all_names


def get_label_pairs(labels_a: LabelList,
labels_b: LabelList,
match_on="uid",
filter=False) -> Dict[str, Tuple[Label, Label]]:
"""
This is a function to pairing a list of prediction labels and a list of ground truth labels easier.
There are a few potentiall problems with this function.
We are assuming that the data row `uid` or `external id` have been provided by the user.
However, these particular fields are not required and can be empty.
If this assumption fails, then the user has to determine their own matching strategy.
Args:
labels_a (LabelList): A collection of labels to match with labels_b
labels_b (LabelList): A collection of labels to match with labels_a
match_on ('uid' or 'external_id'): The data row key to match labels by. Can either be uid or external id.
filter (bool): Whether or not to ignore mismatches
Returns:
A dict containing the union of all either uids or external ids and values as a tuple of the matched labels
"""

if match_on not in ['uid', 'external_id']:
raise ValueError("Can only match on `uid` or `exteranl_id`.")

label_lookup_a = {
getattr(label.data, match_on, None): label for label in labels_a
}
label_lookup_b = {
getattr(label.data, match_on, None): label for label in labels_b
}
all_keys = set(label_lookup_a.keys()).union(label_lookup_b.keys())
if None in label_lookup_a or None in label_lookup_b:
raise ValueError(
f"One or more of the labels has a data row without the required key {match_on}."
" It cannot be determined which labels match without this information."
f" Either assign {match_on} to each Label or create your own pairing function."
)
pairs = defaultdict(list)
for key in all_keys:
a, b = label_lookup_a.pop(key, None), label_lookup_b.pop(key, None)
if a is None or b is None:
if not filter:
raise ValueError(
f"{match_on} {key} is not available in both LabelLists. "
"Set `filter = True` to filter out these examples, assign the ids manually, or create your own matching function."
)
else:
continue
pairs[key].append([a, b])
return pairs


def get_feature_pairs(
features_a: List[FeatureSchema], features_b: List[FeatureSchema]
) -> Dict[str, Tuple[List[FeatureSchema], List[FeatureSchema]]]:
"""
Matches features by schema_ids
Args:
labels_a (List[FeatureSchema]): A list of features to match with features_b
labels_b (List[FeatureSchema]): A list of features to match with features_a
Returns:
The matched features as dict. The key will be the feature name and the value will be
two lists each containing the matched features from each set.
"""
identifying_key = get_identifying_key(features_a, features_b)
lookup_a, lookup_b = _create_feature_lookup(
features_a,
identifying_key), _create_feature_lookup(features_b, identifying_key)

keys = set(lookup_a.keys()).union(set(lookup_b.keys()))
result = defaultdict(list)
for key in keys:
result[key].extend([lookup_a[key], lookup_b[key]])
return result


def _create_feature_lookup(features: List[FeatureSchema],
key: str) -> Dict[str, List[FeatureSchema]]:
"""
Groups annotation by name (if available otherwise feature schema id).
Args:
annotations: List of annotations to group
Returns:
a dict where each key is the feature_schema_id (or name)
and the value is a list of annotations that have that feature_schema_id (or name)
"""
grouped_features = defaultdict(list)
for feature in features:
grouped_features[getattr(feature, key)].append(feature)
return grouped_features
2 changes: 2 additions & 0 deletions labelbox/data/metrics/iou/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .calculation import *
from .iou import *
Loading

0 comments on commit c186ca9

Please sign in to comment.