Skip to content

Commit

Permalink
Docstrings + duplicate method
Browse files Browse the repository at this point in the history
  • Loading branch information
fealho committed Aug 17, 2023
1 parent b99f1fa commit c9b4fc5
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 13 deletions.
22 changes: 16 additions & 6 deletions sdv/metadata/multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,21 +534,20 @@ def validate(self):
)

def _validate_missing_tables(self, data):
"""Validate the data doesn't have all the columns in the metadata."""
errors = []
missing_tables = set(self.tables) - set(data)
if missing_tables:
errors.append(f'The provided data is missing the tables {missing_tables}.')

return errors

def _validate_all_tables(self, data, table_synthesizers=None):
def _validate_all_tables(self, data):
"""Validate every table of the data has a valid table/metadata pair."""
errors = []
for table_name, table_data in data.items():
try:
if table_synthesizers:
table_synthesizers[table_name].validate(table_data)
else:
self.tables[table_name].validate_data(table_data)
self.tables[table_name].validate_data(table_data)

except InvalidDataError as error:
error_msg = f"Table: '{table_name}'"
Expand All @@ -566,6 +565,7 @@ def _validate_all_tables(self, data, table_synthesizers=None):
return errors

def _validate_foreign_keys(self, data):
"""Validate all foreign key relationships."""
error_msg = None
errors = []
for relation in self.relationships:
Expand Down Expand Up @@ -597,7 +597,17 @@ def _validate_foreign_keys(self, data):
return [error_msg] if error_msg else []

def validate_data(self, data):
"""Validate the data matches the metadata."""
"""Validate the data matches the metadata.
Checks the following rules:
* all tables of the metadata are present in the data
* every table of the data satisfies its own metadata
* all foreign keys belong to a primay key
Args:
data (pd.DataFrame):
The data to validate.
"""
errors = []
errors += self._validate_missing_tables(data)
errors += self._validate_all_tables(data)
Expand Down
36 changes: 29 additions & 7 deletions sdv/multi_table/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,27 @@ def get_metadata(self):
"""Return the ``MultiTableMetadata`` for this synthesizer."""
return self.metadata

def _get_all_foreign_keys(self, table_name):
foreign_keys = []
for relation in self.metadata.relationships:
if table_name == relation['child_table_name']:
foreign_keys.append(deepcopy(relation['child_foreign_key']))
def _validate_all_tables(self, data):
"""Validate every table of the data has a valid table/metadata pair."""
errors = []
for table_name, table_data in data.items():
try:
self._table_synthesizers[table_name].validate(table_data)

return foreign_keys
except InvalidDataError as error:
error_msg = f"Table: '{table_name}'"
for _error in error.errors:
error_msg += f'\nError: {_error}'

errors.append(error_msg)

except ValueError as error:
errors.append(str(error))

except KeyError:
continue

return errors

def validate(self, data):
"""Validate data.
Expand All @@ -170,7 +184,7 @@ def validate(self, data):
"""
errors = []
errors += self.metadata._validate_missing_tables(data)
errors += self.metadata._validate_all_tables(data, self._table_synthesizers)
errors += self._validate_all_tables(data)
errors += self.metadata._validate_foreign_keys(data)

if errors:
Expand All @@ -180,6 +194,14 @@ def _validate_table_name(self, table_name):
if table_name not in self._table_synthesizers:
raise InvalidDataError([f"Table '{table_name}' is not present in the metadata."])

def _get_all_foreign_keys(self, table_name):
foreign_keys = []
for relation in self.metadata.relationships:
if table_name == relation['child_table_name']:
foreign_keys.append(deepcopy(relation['child_foreign_key']))

return foreign_keys

def _assign_table_transformers(self, synthesizer, table_name, table_data):
"""Update the ``synthesizer`` to ignore the foreign keys while preprocessing the data."""
synthesizer.auto_assign_transformers(table_data)
Expand Down

0 comments on commit c9b4fc5

Please sign in to comment.