Skip to content

Commit

Permalink
Merge pull request #80 from scaleapi/da-escape-chars
Browse files Browse the repository at this point in the history
Escape Characters
  • Loading branch information
ardila authored Jun 28, 2021
2 parents eaa1e87 + 7fc9fec commit a1f87ef
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 3 deletions.
6 changes: 6 additions & 0 deletions nucleus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import json
import logging
import os
import urllib.request
from typing import Any, Dict, List, Optional, Union

import aiohttp
Expand All @@ -62,6 +63,8 @@
import tqdm
import tqdm.notebook as tqdm_notebook

from nucleus.url_utils import sanitize_string_args

from .annotation import (
BoxAnnotation,
PolygonAnnotation,
Expand Down Expand Up @@ -300,6 +303,7 @@ def delete_dataset(self, dataset_id: str) -> dict:
"""
return self.make_request({}, f"dataset/{dataset_id}", requests.delete)

@sanitize_string_args
def delete_dataset_item(
self, dataset_id: str, item_id: str = None, reference_id: str = None
) -> dict:
Expand Down Expand Up @@ -862,6 +866,7 @@ def model_run_info(self, model_run_id: str):
{}, f"modelRun/{model_run_id}/info", requests.get
)

@sanitize_string_args
def dataitem_ref_id(self, dataset_id: str, reference_id: str):
"""
:param dataset_id: internally controlled dataset id
Expand All @@ -872,6 +877,7 @@ def dataitem_ref_id(self, dataset_id: str, reference_id: str):
{}, f"dataset/{dataset_id}/refloc/{reference_id}", requests.get
)

@sanitize_string_args
def predictions_ref_id(self, model_run_id: str, ref_id: str):
"""
Returns Model Run info For Dataset Item by model_run_id and item reference_id.
Expand Down
3 changes: 2 additions & 1 deletion nucleus/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import requests

from nucleus.job import AsyncJob
from nucleus.url_utils import sanitize_string_args
from nucleus.utils import (
convert_export_payload,
format_dataset_item_response,
Expand Down Expand Up @@ -35,7 +36,6 @@
)
from .payload_constructor import construct_model_run_creation_payload


WARN_FOR_LARGE_UPLOAD = 50000


Expand Down Expand Up @@ -83,6 +83,7 @@ def size(self) -> int:
def items(self) -> List[DatasetItem]:
return self._client.get_dataset_items(self.id)

@sanitize_string_args
def autotag_scores(self, autotag_name, for_scores_greater_than=0):
"""Export the autotag scores above a threshold, largest scores first.
Expand Down
22 changes: 22 additions & 0 deletions nucleus/url_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import urllib.request


def sanitize_field(field):
return urllib.request.quote(field.encode("UTF-8"), safe="")


def sanitize_string_args(function):
def sanitized_function(*args, **kwargs):
sanitized_args = []
sanitized_kwargs = {}
for arg in args:
if isinstance(arg, str):
arg = sanitize_field(arg)
sanitized_args.append(arg)
for key, value in kwargs.items():
if isinstance(value, str):
value = sanitize_field(value)
sanitized_kwargs[key] = value
return function(*sanitized_args, **sanitized_kwargs)

return sanitized_function
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ exclude = '''

[tool.poetry]
name = "scale-nucleus"
version = "0.1.10"
version = "0.1.11"
description = "The official Python client library for Nucleus, the Data Platform for AI"
license = "MIT"
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
Expand Down
15 changes: 14 additions & 1 deletion tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
TEST_IMG_URLS,
TEST_POLYGON_ANNOTATIONS,
TEST_SEGMENTATION_ANNOTATIONS,
TEST_SLICE_NAME,
reference_id_from_url,
)

Expand Down Expand Up @@ -344,6 +343,20 @@ def test_annotate_async_with_error(dataset: Dataset):
assert "Item with id fake_garbage doesn" in str(job.errors())


def test_append_with_special_chars(dataset):
url = TEST_IMG_URLS[0]
ref_id = "test/reference/id"
ds_items = [
DatasetItem(
image_location=url,
reference_id=ref_id,
metadata={"test": "metadata"},
),
]
dataset.append(ds_items)
dataset.refloc(ref_id)


def test_append_and_export(dataset):
# Dataset upload
url = TEST_IMG_URLS[0]
Expand Down
1 change: 1 addition & 0 deletions tests/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def sort_by_reference_id(items):
)


@pytest.mark.integration
def test_slice_send_to_labeling(dataset):
# Dataset upload
ds_items = []
Expand Down

0 comments on commit a1f87ef

Please sign in to comment.