Skip to content

Commit

Permalink
feat: Detect empty packages. Add Format enumeration.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Apr 15, 2024
1 parent 559b812 commit 4116b96
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 11 deletions.
13 changes: 13 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Changelog
=========

1.1.10 (2024-04-15)
-------------------

Added
~~~~~

- :class:`ocdskit.util.Format` enumeration

Changed
~~~~~~~

- :meth:`ocdskit.util.detect_format`: Detect empty packages that set metadata fields but not a ``releases`` or ``records`` field.

1.1.9 (2024-01-05)
------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
author = "Open Contracting Partnership"

# The short X.Y version
version = "1.1.9"
version = "1.1.10"
# The full version, including alpha/beta/rc tags
release = version

Expand Down
42 changes: 33 additions & 9 deletions ocdskit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,14 @@ def detect_format(path, root_path='', reader=open):
releases_prefix = f'{prefix}releases'
ocid_prefix = f'{prefix}ocid'
tag_item_prefix = f'{prefix}tag.item'
metadata_prefixes = {f'{prefix}{field}' for field in ('publishedDate', 'publisher.name', 'uri', 'version')}

has_records = False
has_releases = False
has_ocid = False
has_tag = False
is_compiled = False
metadata_count = 0
is_array = event == 'start_array'

for prefix, event, value in events:
Expand All @@ -235,25 +237,47 @@ def detect_format(path, root_path='', reader=open):
has_tag = True
if value == 'compiled':
is_compiled = True
elif prefix in metadata_prefixes:
metadata_count += 1
if not prefix and event not in ('end_array', 'end_map', 'map_key'):
return _detect_format_result(True, is_array, has_records, has_releases, has_ocid, has_tag, is_compiled)
return _detect_format_result(
True, is_array, has_records, has_releases, has_ocid, has_tag, is_compiled, metadata_count
)

return _detect_format_result(False, is_array, has_records, has_releases, has_ocid, has_tag, is_compiled)
return _detect_format_result(
False, is_array, has_records, has_releases, has_ocid, has_tag, is_compiled, metadata_count
)


def _detect_format_result(is_concatenated, is_array, has_records, has_releases, has_ocid, has_tag, is_compiled):
from enum import Enum

class Format(str, Enum):
compiled_release = 'compiled release'
empty_package = 'empty package'
record = 'record'
record_package = 'record package'
release = 'release'
release_package = 'release package'
versioned_release = 'versioned release'


def _detect_format_result(
is_concatenated, is_array, has_records, has_releases, has_ocid, has_tag, is_compiled, metadata_count
):
if has_records:
detected_format = 'record package'
detected_format = Format.record_package
elif has_releases and has_ocid:
detected_format = 'record'
detected_format = Format.record
elif has_releases:
detected_format = 'release package'
detected_format = Format.release_package
elif is_compiled:
detected_format = 'compiled release'
detected_format = Format.compiled_release
elif has_tag:
detected_format = 'release'
detected_format = Format.release
elif has_ocid:
detected_format = 'versioned release'
detected_format = Format.versioned_release
elif metadata_count == 4:
detected_format = Format.empty_package
else:
if is_array:
infix = 'array'
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = ocdskit
version = 1.1.9
version = 1.1.10
author = Open Contracting Partnership
author_email = data@open-contracting.org
license = BSD
Expand Down
1 change: 1 addition & 0 deletions tests/commands/test_detect_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
('release-packages.jsonl', 'concatenated JSON, starting with a JSON array of release packages'),
('detect-format_mixed.json', 'concatenated JSON, starting with release'),
('detect-format_whitespace.json', 'release'),
('detect-format_empty.json', ('empty package')),
])
def test_command(filename, result, capsys, monkeypatch):
expected = f'tests{os.sep}fixtures{os.sep}{filename}: {result}\n'
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/detect-format_empty.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"uri":"https://staging.opentender.eu//protected/master_tender/id/02a99dc5-0d65-4379-b2b7-708d75bc1d1b?format=ocds","version":"1.1","publishedDate":"2024-04-10T08:17:05.643Z","publisher":{"name":"staging.opentender.eu"},"extensions":["https://raw.githubusercontent.com/open-contracting/ocds_lots_extension/v1.1.1/extension.json","https://raw.githubusercontent.com/open-contracting/ocds_bid_extension/v1.1.1/extension.json","https://raw.githubusercontent.com/open-contracting/ocds_requirements_extension/master/extension.json"],"metaData":{"lastModified":"2022-09-10T03:59:29.973141487"}}
1 change: 1 addition & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def test_json_dump(data, expected, tmpdir):
('release-packages.jsonl', ('release package', True, True)),
('detect-format_mixed.json', ('release', True, False)),
('detect-format_whitespace.json', ('release', False, False)),
('detect-format_empty.json', ('empty package', False, False)),
])
def test_detect_format(filename, expected):
result = detect_format(path(filename))
Expand Down

0 comments on commit 4116b96

Please sign in to comment.