Skip to content

Commit

Permalink
Drop metadata schemas on tables.
Browse files Browse the repository at this point in the history
Closes #2944
  • Loading branch information
jeromekelleher authored and mergify[bot] committed May 7, 2024
1 parent d32d63f commit a1aa362
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 1 deletion.
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
using recombination information, leading to unary nodes in many trees and
fewer edges. (:user:`petrelharp`, :user:`hfr1tz3`, :user:`avabamf`, :pr:`2651`)

- Add ``Table.drop_metadata`` to make clearing metadata from tables easy.
(:user:`jeromekelleher`, :pr:`2944`)

**Bugfixes**

- Fix to the folded, expected allele frequency spectrum (i.e.,
Expand Down
41 changes: 41 additions & 0 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,25 @@ def test_random_metadata(self):
)
assert metadatas == unpacked_metadatas

def test_drop_metadata(self):
for num_rows in [1, 10, 100]:
input_data = self.make_input_data(num_rows)
table_no_meta = self.table_class()
table_with_meta = self.table_class()
table_with_meta.set_columns(**input_data)
if not getattr(self, "metadata_mandatory", False):
del input_data["metadata"]
del input_data["metadata_offset"]
else:
# Have to do this slightly circular way for the population
# table because it requires metadata.
input_data["metadata"] = []
input_data["metadata_offset"][:] = 0
table_no_meta.set_columns(**input_data)
assert not table_no_meta.equals(table_with_meta)
table_with_meta.drop_metadata()
table_no_meta.assert_equals(table_with_meta)

def test_optional_metadata(self):
if not getattr(self, "metadata_mandatory", False):
for num_rows in [0, 10, 100]:
Expand Down Expand Up @@ -982,6 +1001,28 @@ def test_set_metadata_schema(self):
):
table.metadata_schema = {}

def test_drop_metadata_with_schema(self):
table = self.table_class()
table.metadata_schema = metadata.MetadataSchema.permissive_json()
data = self.input_data_for_add_row()
data["metadata"] = {"a": "dict"}
table.add_row(**data)
assert table[0].metadata == {"a": "dict"}
table.drop_metadata()
assert table.metadata_schema == metadata.MetadataSchema.null()
assert table[0].metadata == b""

def test_drop_metadata_keep_schema(self):
table = self.table_class()
table.metadata_schema = metadata.MetadataSchema.permissive_json()
data = self.input_data_for_add_row()
data["metadata"] = {"a": "dict"}
table.add_row(**data)
assert table[0].metadata == {"a": "dict"}
table.drop_metadata(keep_schema=True)
assert table.metadata_schema == metadata.MetadataSchema.permissive_json()
assert table[0].metadata == {}

def test_default_metadata_schema(self):
# Default should allow bytes as in pre-exisiting code
table = self.table_class()
Expand Down
10 changes: 9 additions & 1 deletion python/tskit/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,14 @@ def permissive_json():
"""
return MetadataSchema({"codec": "json"})

@staticmethod
def null():
"""
The null schema which defines no properties and results in raw bytes
being returned on accessing metadata column.
"""
return MetadataSchema(None)


# Often many replicate tree sequences are processed with identical schemas, so cache them
@functools.lru_cache(maxsize=128)
Expand All @@ -734,7 +742,7 @@ def parse_metadata_schema(encoded_schema: str) -> MetadataSchema:
:return: A subclass of AbstractMetadataSchema.
"""
if encoded_schema == "":
return MetadataSchema(schema=None)
return MetadataSchema.null()
else:
try:
decoded = json.loads(
Expand Down
14 changes: 14 additions & 0 deletions python/tskit/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,20 @@ def getter(d, k):
)
return out

def drop_metadata(self, *, keep_schema=False):
"""
Drops all metadata in this table. By default, the schema is also cleared,
except if ``keep_schema`` is True.
:param bool keep_schema: True if the current schema should be kept intact.
"""
data = self.asdict()
data["metadata"] = []
data["metadata_offset"][:] = 0
self.set_columns(**data)
if not keep_schema:
self.metadata_schema = metadata.MetadataSchema.null()


class IndividualTable(MetadataTable):
"""
Expand Down

0 comments on commit a1aa362

Please sign in to comment.