From dcc42607e8fbdf990f245e63a3d0b19b3ad6ff13 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Mon, 7 Aug 2023 11:12:34 +0100 Subject: [PATCH 1/6] define detection methods + tests --- sdv/metadata/multi_table.py | 40 ++++++- .../integration/metadata/test_multi_table.py | 103 ++++++++++++++++++ tests/unit/metadata/test_multi_table.py | 95 +++++++++++++++- 3 files changed, 233 insertions(+), 5 deletions(-) diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py index c7838500c..687504c24 100644 --- a/sdv/metadata/multi_table.py +++ b/sdv/metadata/multi_table.py @@ -2,10 +2,13 @@ import json import logging +import os import warnings from collections import defaultdict from copy import deepcopy +import pandas as pd + from sdv.metadata.errors import InvalidMetadataError from sdv.metadata.metadata_upgrader import convert_metadata from sdv.metadata.single_table import SingleTableMetadata @@ -344,6 +347,20 @@ def detect_table_from_dataframe(self, table_name, data): self.tables[table_name] = table self._log_detected_table(table) + def detect_from_dataframes(self, data): + """Detect the metadata for all tables in a dictionary of dataframes. + + Args: + data (dict): + Dictionary of ``pandas.DataFrame`` objects where the keys are the table names and + the values are the dataframes. + """ + if not data or not all(isinstance(df, pd.DataFrame) for df in data.values()): + raise ValueError('The provided dictionary must contain only pandas DataFrame objects') + + for table_name, dataframe in data.items(): + self.detect_table_from_dataframe(table_name, dataframe) + def detect_table_from_csv(self, table_name, filepath): """Detect the metadata for a table from a csv file. @@ -355,11 +372,30 @@ def detect_table_from_csv(self, table_name, filepath): """ self._validate_table_not_detected(table_name) table = SingleTableMetadata() - data = table._load_data_from_csv(filepath) - table._detect_columns(data) + table.detect_from_csv(filepath) self.tables[table_name] = table self._log_detected_table(table) + def detect_from_csvs(self, folder_name): + """Detect the metadata for all tables in a folder of csv files. + + Args: + folder_name (str): + Name of the folder to detect the metadata from. + + Raises: + ValueError: If no CSV files are detected in the folder. + """ + csv_files = [filename for filename in os.listdir(folder_name) if filename.endswith('.csv')] + + if not csv_files: + raise ValueError(f"No CSV files detected in the folder '{folder_name}'") + + for filename in csv_files: + table_name = filename[:-4] # Removing the .csv extension + csv_file = os.path.join(folder_name, filename) + self.detect_table_from_csv(table_name, csv_file) + def set_primary_key(self, table_name, column_name): """Set the primary key of a table. diff --git a/tests/integration/metadata/test_multi_table.py b/tests/integration/metadata/test_multi_table.py index c86bedc0f..487a60c7e 100644 --- a/tests/integration/metadata/test_multi_table.py +++ b/tests/integration/metadata/test_multi_table.py @@ -1,7 +1,10 @@ """Integration tests for Multi Table Metadata.""" import json +import os +import tempfile +from sdv.datasets.demo import download_demo from sdv.metadata import MultiTableMetadata @@ -132,3 +135,103 @@ def test_upgrade_metadata(tmp_path): assert new_metadata['tables'] == expected_metadata['tables'] for relationship in new_metadata['relationships']: assert relationship in expected_metadata['relationships'] + + +def test_detect_from_dataframes(): + """Test the ``detect_from_dataframes`` method.""" + # Setup + real_data, _ = download_demo( + modality='multi_table', + dataset_name='fake_hotels' + ) + + metadata = MultiTableMetadata() + + # Run + metadata.detect_from_dataframes(real_data) + + # Assert + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'categorical'}, + 'city': {'sdtype': 'categorical'}, + 'state': {'sdtype': 'categorical'}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'} + } + }, + 'guests': { + 'columns': { + 'guest_email': {'sdtype': 'categorical'}, + 'hotel_id': {'sdtype': 'categorical'}, + 'has_rewards': {'sdtype': 'boolean'}, + 'room_type': {'sdtype': 'categorical'}, + 'amenities_fee': {'sdtype': 'numerical'}, + 'checkin_date': {'sdtype': 'categorical'}, + 'checkout_date': {'sdtype': 'categorical'}, + 'room_rate': {'sdtype': 'numerical'}, + 'billing_address': {'sdtype': 'categorical'}, + 'credit_card_number': {'sdtype': 'numerical'} + } + } + }, + 'relationships': [], + 'METADATA_SPEC_VERSION': 'MULTI_TABLE_V1' + } + + assert metadata.to_dict() == expected_metadata + + +def test_detect_from_csvs(): + """Test the ``detect_from_csvs`` method.""" + # Setup + real_data, _ = download_demo( + modality='multi_table', + dataset_name='fake_hotels' + ) + + metadata = MultiTableMetadata() + + with tempfile.TemporaryDirectory() as temp_dir: + # Save the dataframes as CSV files in the temporary directory + for table_name, dataframe in real_data.items(): + csv_path = os.path.join(temp_dir, f'{table_name}.csv') + dataframe.to_csv(csv_path, index=False) + + # Run + metadata.detect_from_csvs(folder_name=temp_dir) + + # Assert + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'categorical'}, + 'city': {'sdtype': 'categorical'}, + 'state': {'sdtype': 'categorical'}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'} + } + }, + 'guests': { + 'columns': { + 'guest_email': {'sdtype': 'categorical'}, + 'hotel_id': {'sdtype': 'categorical'}, + 'has_rewards': {'sdtype': 'boolean'}, + 'room_type': {'sdtype': 'categorical'}, + 'amenities_fee': {'sdtype': 'numerical'}, + 'checkin_date': {'sdtype': 'categorical'}, + 'checkout_date': {'sdtype': 'categorical'}, + 'room_rate': {'sdtype': 'numerical'}, + 'billing_address': {'sdtype': 'categorical'}, + 'credit_card_number': {'sdtype': 'numerical'} + } + } + }, + 'relationships': [], + 'METADATA_SPEC_VERSION': 'MULTI_TABLE_V1' + } + + assert metadata.to_dict() == expected_metadata diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py index c0ece36cb..0cf8a5545 100644 --- a/tests/unit/metadata/test_multi_table.py +++ b/tests/unit/metadata/test_multi_table.py @@ -1604,7 +1604,7 @@ def test_detect_table_from_csv(self, single_table_mock, log_mock): # Setup metadata = MultiTableMetadata() fake_data = Mock() - single_table_mock.return_value._load_data_from_csv.return_value = fake_data + single_table_mock.return_value.detect_from_csv.return_value = fake_data single_table_mock.return_value.to_dict.return_value = { 'columns': {'a': {'sdtype': 'numerical'}} } @@ -1613,8 +1613,7 @@ def test_detect_table_from_csv(self, single_table_mock, log_mock): metadata.detect_table_from_csv('table', 'path.csv') # Assert - single_table_mock.return_value._load_data_from_csv.assert_called_once_with('path.csv') - single_table_mock.return_value._detect_columns.assert_called_once_with(fake_data) + single_table_mock.return_value.detect_from_csv.assert_called_once_with('path.csv') assert metadata.tables == {'table': single_table_mock.return_value} expected_log_calls = call( @@ -1656,6 +1655,54 @@ def test_detect_table_from_csv_table_already_exists(self): with pytest.raises(InvalidMetadataError, match=error_message): metadata.detect_table_from_csv('table', 'path.csv') + def test_detect_from_csvs(self, tmp_path): + """Test the ``detect_from_csvs`` method. + + The method should call ``detect_table_from_csv`` for each csv in the folder. + """ + # Setup + instance = MultiTableMetadata() + instance.detect_table_from_csv = Mock() + + data1 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + data2 = pd.DataFrame({'col1': [5, 6], 'col2': [7, 8]}) + + filepath1 = tmp_path / 'table1.csv' + filepath2 = tmp_path / 'table2.csv' + data1.to_csv(filepath1, index=False) + data2.to_csv(filepath2, index=False) + + json_filepath = tmp_path / 'not_csv.json' + with open(json_filepath, 'w') as json_file: + json_file.write('{"key": "value"}') + + # Run + instance.detect_from_csvs(tmp_path) + + # Assert + expected_calls = [ + call('table1', str(filepath1)), + call('table2', str(filepath2)) + ] + + instance.detect_table_from_csv.assert_has_calls(expected_calls, any_order=True) + assert instance.detect_table_from_csv.call_count == 2 + + def test_detect_from_csvs_no_csv(self, tmp_path): + """Test the ``detect_from_csvs`` method with no csv file in the folder.""" + # Setup + instance = MultiTableMetadata() + + json_filepath = tmp_path / 'not_csv.json' + with open(json_filepath, 'w') as json_file: + json_file.write('{"key": "value"}') + + # Run and Assert + expected_message = re.escape("No CSV files detected in the folder '{}'".format(tmp_path)) + + with pytest.raises(ValueError, match=expected_message): + instance.detect_from_csvs(tmp_path) + @patch('sdv.metadata.multi_table.LOGGER') @patch('sdv.metadata.multi_table.SingleTableMetadata') def test_detect_table_from_dataframe(self, single_table_mock, log_mock): @@ -1723,6 +1770,48 @@ def test_detect_table_from_dataframe_table_already_exists(self): with pytest.raises(InvalidMetadataError, match=error_message): metadata.detect_table_from_dataframe('table', pd.DataFrame()) + def test_detect_from_dataframes(self): + """Test ``detect_from_dataframes``. + + Expected to call ``detect_table_from_dataframe`` for each table name and dataframe + in the input. + """ + # Setup + metadata = MultiTableMetadata() + metadata.detect_table_from_dataframe = Mock() + + guests_table = pd.DataFrame() + hotels_table = pd.DataFrame() + + # Run + metadata.detect_from_dataframes( + data={ + 'guests': guests_table, + 'hotels': hotels_table + } + ) + + # Assert + metadata.detect_table_from_dataframe.assert_any_call('guests', guests_table) + metadata.detect_table_from_dataframe.assert_any_call('hotels', hotels_table) + + def test_detect_from_dataframes_no_dataframes(self): + """Test ``detect_from_dataframes`` with no dataframes in the input. + + Expected to raise an error. + """ + # Setup + metadata = MultiTableMetadata() + + # Run and Assert + expected_message = 'The provided dictionary must contain only pandas DataFrame objects' + + with pytest.raises(ValueError, match=expected_message): + metadata.detect_from_dataframes(data={}) + + with pytest.raises(ValueError, match=expected_message): + metadata.detect_from_dataframes(data={'a': 1}) + def test__validate_table_exists(self): """Test ``_validate_table_exists``. From c18bc4a8b3618a4cca95cefbb6e34daabb366157 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 10 Aug 2023 11:20:16 +0100 Subject: [PATCH 2/6] address comments --- sdv/metadata/multi_table.py | 16 +++++++++------- tests/unit/metadata/test_multi_table.py | 11 ++++++++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py index 687504c24..55bac2a2d 100644 --- a/sdv/metadata/multi_table.py +++ b/sdv/metadata/multi_table.py @@ -352,11 +352,10 @@ def detect_from_dataframes(self, data): Args: data (dict): - Dictionary of ``pandas.DataFrame`` objects where the keys are the table names and - the values are the dataframes. + Dictionary of table names to dataframes. """ if not data or not all(isinstance(df, pd.DataFrame) for df in data.values()): - raise ValueError('The provided dictionary must contain only pandas DataFrame objects') + raise ValueError('The provided dictionary must contain only pandas DataFrame objects.') for table_name, dataframe in data.items(): self.detect_table_from_dataframe(table_name, dataframe) @@ -383,13 +382,16 @@ def detect_from_csvs(self, folder_name): folder_name (str): Name of the folder to detect the metadata from. - Raises: - ValueError: If no CSV files are detected in the folder. """ - csv_files = [filename for filename in os.listdir(folder_name) if filename.endswith('.csv')] + if os.path.exists(folder_name) and os.path.isdir(folder_name): + csv_files = [ + filename for filename in os.listdir(folder_name) if filename.endswith('.csv') + ] + else: + raise ValueError(f"The folder '{folder_name}' does not exist.") if not csv_files: - raise ValueError(f"No CSV files detected in the folder '{folder_name}'") + raise ValueError(f"No CSV files detected in the folder '{folder_name}'.") for filename in csv_files: table_name = filename[:-4] # Removing the .csv extension diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py index 0cf8a5545..04fa418c9 100644 --- a/tests/unit/metadata/test_multi_table.py +++ b/tests/unit/metadata/test_multi_table.py @@ -1698,11 +1698,16 @@ def test_detect_from_csvs_no_csv(self, tmp_path): json_file.write('{"key": "value"}') # Run and Assert - expected_message = re.escape("No CSV files detected in the folder '{}'".format(tmp_path)) - + expected_message = re.escape("No CSV files detected in the folder '{}'.".format(tmp_path)) with pytest.raises(ValueError, match=expected_message): instance.detect_from_csvs(tmp_path) + expected_message_folder = re.escape( + "The folder '{}' does not exist.".format('not_a_folder') + ) + with pytest.raises(ValueError, match=expected_message_folder): + instance.detect_from_csvs('not_a_folder') + @patch('sdv.metadata.multi_table.LOGGER') @patch('sdv.metadata.multi_table.SingleTableMetadata') def test_detect_table_from_dataframe(self, single_table_mock, log_mock): @@ -1804,7 +1809,7 @@ def test_detect_from_dataframes_no_dataframes(self): metadata = MultiTableMetadata() # Run and Assert - expected_message = 'The provided dictionary must contain only pandas DataFrame objects' + expected_message = 'The provided dictionary must contain only pandas DataFrame objects.' with pytest.raises(ValueError, match=expected_message): metadata.detect_from_dataframes(data={}) From f5bec5edf51b9f2686b91135a363f4230b83d46c Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 10 Aug 2023 14:30:23 +0100 Subject: [PATCH 3/6] use Pathlib --- sdv/metadata/multi_table.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py index 55bac2a2d..446ed9a27 100644 --- a/sdv/metadata/multi_table.py +++ b/sdv/metadata/multi_table.py @@ -2,10 +2,10 @@ import json import logging -import os import warnings from collections import defaultdict from copy import deepcopy +from pathlib import Path import pandas as pd @@ -383,20 +383,19 @@ def detect_from_csvs(self, folder_name): Name of the folder to detect the metadata from. """ - if os.path.exists(folder_name) and os.path.isdir(folder_name): - csv_files = [ - filename for filename in os.listdir(folder_name) if filename.endswith('.csv') - ] + folder_path = Path(folder_name) + + if folder_path.is_dir(): + csv_files = list(folder_path.rglob('*.csv')) else: raise ValueError(f"The folder '{folder_name}' does not exist.") if not csv_files: raise ValueError(f"No CSV files detected in the folder '{folder_name}'.") - for filename in csv_files: - table_name = filename[:-4] # Removing the .csv extension - csv_file = os.path.join(folder_name, filename) - self.detect_table_from_csv(table_name, csv_file) + for csv_file in csv_files: + table_name = csv_file.stem + self.detect_table_from_csv(table_name, str(csv_file)) def set_primary_key(self, table_name, column_name): """Set the primary key of a table. From 794926bb6af8e2584182dd0a410909c306a141a9 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Fri, 11 Aug 2023 11:50:51 +0100 Subject: [PATCH 4/6] modify test to use tmp_path --- .../integration/metadata/test_multi_table.py | 79 +++++++++---------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/tests/integration/metadata/test_multi_table.py b/tests/integration/metadata/test_multi_table.py index 487a60c7e..9aad071c7 100644 --- a/tests/integration/metadata/test_multi_table.py +++ b/tests/integration/metadata/test_multi_table.py @@ -1,8 +1,7 @@ """Integration tests for Multi Table Metadata.""" import json -import os -import tempfile +import pytest from sdv.datasets.demo import download_demo from sdv.metadata import MultiTableMetadata @@ -184,7 +183,7 @@ def test_detect_from_dataframes(): assert metadata.to_dict() == expected_metadata -def test_detect_from_csvs(): +def test_detect_from_csvs(tmp_path): """Test the ``detect_from_csvs`` method.""" # Setup real_data, _ = download_demo( @@ -194,44 +193,42 @@ def test_detect_from_csvs(): metadata = MultiTableMetadata() - with tempfile.TemporaryDirectory() as temp_dir: - # Save the dataframes as CSV files in the temporary directory - for table_name, dataframe in real_data.items(): - csv_path = os.path.join(temp_dir, f'{table_name}.csv') - dataframe.to_csv(csv_path, index=False) - - # Run - metadata.detect_from_csvs(folder_name=temp_dir) - - # Assert - expected_metadata = { - 'tables': { - 'hotels': { - 'columns': { - 'hotel_id': {'sdtype': 'categorical'}, - 'city': {'sdtype': 'categorical'}, - 'state': {'sdtype': 'categorical'}, - 'rating': {'sdtype': 'numerical'}, - 'classification': {'sdtype': 'categorical'} - } - }, - 'guests': { - 'columns': { - 'guest_email': {'sdtype': 'categorical'}, - 'hotel_id': {'sdtype': 'categorical'}, - 'has_rewards': {'sdtype': 'boolean'}, - 'room_type': {'sdtype': 'categorical'}, - 'amenities_fee': {'sdtype': 'numerical'}, - 'checkin_date': {'sdtype': 'categorical'}, - 'checkout_date': {'sdtype': 'categorical'}, - 'room_rate': {'sdtype': 'numerical'}, - 'billing_address': {'sdtype': 'categorical'}, - 'credit_card_number': {'sdtype': 'numerical'} - } + for table_name, dataframe in real_data.items(): + csv_path = tmp_path / f"{table_name}.csv" + dataframe.to_csv(csv_path, index=False) + + # Run + metadata.detect_from_csvs(folder_name=tmp_path) + + # Assert + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'categorical'}, + 'city': {'sdtype': 'categorical'}, + 'state': {'sdtype': 'categorical'}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'} } }, - 'relationships': [], - 'METADATA_SPEC_VERSION': 'MULTI_TABLE_V1' - } + 'guests': { + 'columns': { + 'guest_email': {'sdtype': 'categorical'}, + 'hotel_id': {'sdtype': 'categorical'}, + 'has_rewards': {'sdtype': 'boolean'}, + 'room_type': {'sdtype': 'categorical'}, + 'amenities_fee': {'sdtype': 'numerical'}, + 'checkin_date': {'sdtype': 'categorical'}, + 'checkout_date': {'sdtype': 'categorical'}, + 'room_rate': {'sdtype': 'numerical'}, + 'billing_address': {'sdtype': 'categorical'}, + 'credit_card_number': {'sdtype': 'numerical'} + } + } + }, + 'relationships': [], + 'METADATA_SPEC_VERSION': 'MULTI_TABLE_V1' + } - assert metadata.to_dict() == expected_metadata + assert metadata.to_dict() == expected_metadata From b1cacea5bcdccba246f1fac9ca5e35584a1d9c4f Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Fri, 11 Aug 2023 12:13:08 +0100 Subject: [PATCH 5/6] test detect_table_from_csv --- .../integration/metadata/test_multi_table.py | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/integration/metadata/test_multi_table.py b/tests/integration/metadata/test_multi_table.py index 9aad071c7..4546c31b7 100644 --- a/tests/integration/metadata/test_multi_table.py +++ b/tests/integration/metadata/test_multi_table.py @@ -1,7 +1,6 @@ """Integration tests for Multi Table Metadata.""" import json -import pytest from sdv.datasets.demo import download_demo from sdv.metadata import MultiTableMetadata @@ -194,7 +193,7 @@ def test_detect_from_csvs(tmp_path): metadata = MultiTableMetadata() for table_name, dataframe in real_data.items(): - csv_path = tmp_path / f"{table_name}.csv" + csv_path = tmp_path / f'{table_name}.csv' dataframe.to_csv(csv_path, index=False) # Run @@ -232,3 +231,40 @@ def test_detect_from_csvs(tmp_path): } assert metadata.to_dict() == expected_metadata + + +def test_detect_table_from_csv(tmp_path): + """Test the ``detect_table_from_csv`` method.""" + # Setup + real_data, _ = download_demo( + modality='multi_table', + dataset_name='fake_hotels' + ) + + metadata = MultiTableMetadata() + + for table_name, dataframe in real_data.items(): + csv_path = tmp_path / f'{table_name}.csv' + dataframe.to_csv(csv_path, index=False) + + # Run + metadata.detect_table_from_csv('hotels', tmp_path / 'hotels.csv') + + # Assert + expected_metadata = { + 'tables': { + 'hotels': { + 'columns': { + 'hotel_id': {'sdtype': 'categorical'}, + 'city': {'sdtype': 'categorical'}, + 'state': {'sdtype': 'categorical'}, + 'rating': {'sdtype': 'numerical'}, + 'classification': {'sdtype': 'categorical'} + } + } + }, + 'relationships': [], + 'METADATA_SPEC_VERSION': 'MULTI_TABLE_V1' + } + + assert metadata.to_dict() == expected_metadata From ab8352eb56916db6eb0fa6a47810b64b44c4fedd Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Fri, 11 Aug 2023 16:56:04 +0100 Subject: [PATCH 6/6] use load_data_from_csv --- sdv/metadata/multi_table.py | 5 +++-- tests/unit/metadata/test_multi_table.py | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py index 446ed9a27..28a066872 100644 --- a/sdv/metadata/multi_table.py +++ b/sdv/metadata/multi_table.py @@ -15,7 +15,7 @@ from sdv.metadata.utils import read_json, validate_file_does_not_exist from sdv.metadata.visualization import ( create_columns_node, create_summarized_columns_node, visualize_graph) -from sdv.utils import cast_to_iterable +from sdv.utils import cast_to_iterable, load_data_from_csv LOGGER = logging.getLogger(__name__) @@ -371,7 +371,8 @@ def detect_table_from_csv(self, table_name, filepath): """ self._validate_table_not_detected(table_name) table = SingleTableMetadata() - table.detect_from_csv(filepath) + data = load_data_from_csv(filepath) + table._detect_columns(data) self.tables[table_name] = table self._log_detected_table(table) diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py index 04fa418c9..8c60b46ab 100644 --- a/tests/unit/metadata/test_multi_table.py +++ b/tests/unit/metadata/test_multi_table.py @@ -1589,7 +1589,8 @@ def test_update_column_table_does_not_exist(self): @patch('sdv.metadata.multi_table.LOGGER') @patch('sdv.metadata.multi_table.SingleTableMetadata') - def test_detect_table_from_csv(self, single_table_mock, log_mock): + @patch('sdv.metadata.multi_table.load_data_from_csv') + def test_detect_table_from_csv(self, load_csv_mock, single_table_mock, log_mock): """Test the ``detect_table_from_csv`` method. If the table does not already exist, a ``SingleTableMetadata`` instance @@ -1604,7 +1605,7 @@ def test_detect_table_from_csv(self, single_table_mock, log_mock): # Setup metadata = MultiTableMetadata() fake_data = Mock() - single_table_mock.return_value.detect_from_csv.return_value = fake_data + load_csv_mock.return_value = fake_data single_table_mock.return_value.to_dict.return_value = { 'columns': {'a': {'sdtype': 'numerical'}} } @@ -1613,7 +1614,8 @@ def test_detect_table_from_csv(self, single_table_mock, log_mock): metadata.detect_table_from_csv('table', 'path.csv') # Assert - single_table_mock.return_value.detect_from_csv.assert_called_once_with('path.csv') + load_csv_mock.assert_called_once_with('path.csv') + single_table_mock.return_value._detect_columns.assert_called_once_with(fake_data) assert metadata.tables == {'table': single_table_mock.return_value} expected_log_calls = call(