Skip to content

Commit

Permalink
Drop metadata schemas on tables.
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed May 3, 2024
1 parent 1b84b63 commit 3b26d87
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 19 deletions.
33 changes: 27 additions & 6 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,20 @@ def make_transposed_input_data(self, num_rows):
cols = self.make_input_data(num_rows)
return [
{
col: data[j]
if len(data) == num_rows
else (
bytes(data[cols[f"{col}_offset"][j] : cols[f"{col}_offset"][j + 1]])
if "metadata" in col
else data[cols[f"{col}_offset"][j] : cols[f"{col}_offset"][j + 1]]
col: (
data[j]
if len(data) == num_rows
else (
bytes(
data[
cols[f"{col}_offset"][j] : cols[f"{col}_offset"][j + 1]
]
)
if "metadata" in col
else data[
cols[f"{col}_offset"][j] : cols[f"{col}_offset"][j + 1]
]
)
)
for col, data in cols.items()
if "offset" not in col
Expand Down Expand Up @@ -911,6 +919,19 @@ def test_random_metadata(self):
)
assert metadatas == unpacked_metadatas

def test_drop_metadata(self):
for num_rows in [1, 10, 100]:
input_data = self.make_input_data(num_rows)
table_no_meta = self.table_class()
table_with_meta = self.table_class()
table_with_meta.set_columns(**input_data)
del input_data["metadata"]
del input_data["metadata_offset"]
table_no_meta.set_columns(**input_data)
assert not table_no_meta.equals(table_with_meta)
table_with_meta.drop_metadata()
table_no_meta.assert_equals(table_with_meta)

def test_optional_metadata(self):
if not getattr(self, "metadata_mandatory", False):
for num_rows in [0, 10, 100]:
Expand Down
35 changes: 22 additions & 13 deletions python/tskit/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,18 @@ def required_validator(validator, required, instance, schema):
"type": "string",
"pattern": r"^([cbB\?hHiIlLqQfd]|\d*[spx])$",
}
struct_meta_schema["definitions"]["root"]["properties"][
"binaryFormat"
] = struct_meta_schema["properties"]["binaryFormat"]
struct_meta_schema["definitions"]["root"]["properties"]["binaryFormat"] = (
struct_meta_schema["properties"]["binaryFormat"]
)
# arrayLengthFormat matches regex and has default
struct_meta_schema["properties"]["arrayLengthFormat"] = {
"type": "string",
"pattern": r"^[BHILQ]$",
"default": "L",
}
struct_meta_schema["definitions"]["root"]["properties"][
"arrayLengthFormat"
] = struct_meta_schema["properties"]["arrayLengthFormat"]
struct_meta_schema["definitions"]["root"]["properties"]["arrayLengthFormat"] = (
struct_meta_schema["properties"]["arrayLengthFormat"]
)
# index is numeric
struct_meta_schema["properties"]["index"] = {"type": "number"}
struct_meta_schema["definitions"]["root"]["properties"]["index"] = struct_meta_schema[
Expand All @@ -285,14 +285,14 @@ def required_validator(validator, required, instance, schema):
"type": "string",
"default": "utf-8",
}
struct_meta_schema["definitions"]["root"]["properties"][
"stringEncoding"
] = struct_meta_schema["properties"]["stringEncoding"]
struct_meta_schema["definitions"]["root"]["properties"]["stringEncoding"] = (
struct_meta_schema["properties"]["stringEncoding"]
)
# nullTerminated is a boolean
struct_meta_schema["properties"]["nullTerminated"] = {"type": "boolean"}
struct_meta_schema["definitions"]["root"]["properties"][
"nullTerminated"
] = struct_meta_schema["properties"]["nullTerminated"]
struct_meta_schema["definitions"]["root"]["properties"]["nullTerminated"] = (
struct_meta_schema["properties"]["nullTerminated"]
)
# noLengthEncodingExhaustBuffer is a boolean
struct_meta_schema["properties"]["noLengthEncodingExhaustBuffer"] = {"type": "boolean"}
struct_meta_schema["definitions"]["root"]["properties"][
Expand Down Expand Up @@ -722,6 +722,15 @@ def permissive_json():
"""
return MetadataSchema({"codec": "json"})

@staticmethod
def null():
"""
The null schema which defines no properties and results in raw bytes being returned
on accessing metadata column.s
no constraints on the properties.
"""
return MetadataSchema(None)


# Often many replicate tree sequences are processed with identical schemas, so cache them
@functools.lru_cache(maxsize=128)
Expand All @@ -734,7 +743,7 @@ def parse_metadata_schema(encoded_schema: str) -> MetadataSchema:
:return: A subclass of AbstractMetadataSchema.
"""
if encoded_schema == "":
return MetadataSchema(schema=None)
return MetadataSchema.null()
else:
try:
decoded = json.loads(
Expand Down
16 changes: 16 additions & 0 deletions python/tskit/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,22 @@ def getter(d, k):
)
return out

def drop_metadata(self, keep_schema=False):
"""
Drops all metadata in this table. By default, the schema is also cleared,
except if ``keep_schema`` is True.
:param bool keep_schema: True if the current schema should be kept intact.
"""
if not keep_schema:
self.metadata_schema = metadata.MetadataSchema.null()
data = {
col: getattr(self, col)
for col in self.column_names
if not col.startswith("metadata")
}
self.set_columns(**data)


class IndividualTable(MetadataTable):
"""
Expand Down

0 comments on commit 3b26d87

Please sign in to comment.