Skip to content

Commit

Permalink
feat(datasets): Update __repr__ for PartitionedDataset (#782)
Browse files Browse the repository at this point in the history
* Implemented __repr__

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added dummy object creation needed for class __repr__

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated RELEASE.md

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Implemented test for repr

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Debug output

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated filepath

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed lint

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed redundant function call

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

---------

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
  • Loading branch information
ElenaKhaustova authored Jul 30, 2024
1 parent 24cd01c commit da66452
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
2 changes: 2 additions & 0 deletions kedro-datasets/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Upcoming Release
## Major features and improvements
* Improved PartitionedDataset representation when printing.

## Bug fixes and other changes
## Breaking Changes
## Community contributions
Expand Down
16 changes: 16 additions & 0 deletions kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,22 @@ def _describe(self) -> dict[str, Any]:
"dataset_config": clean_dataset_config,
}

def __repr__(self) -> str:
object_description = self._describe()

# Dummy object to call _pretty_repr
# Only clean_dataset_config parameters are exposed
kwargs = deepcopy(self._dataset_config)
kwargs[self._filepath_arg] = ""
dataset = self._dataset_type(**kwargs) # type: ignore

object_description_repr = {
"filepath": object_description["path"],
"dataset": dataset._pretty_repr(object_description["dataset_config"]),
}

return self._pretty_repr(object_description_repr)

def _invalidate_caches(self) -> None:
self._partition_cache.clear()
self._filesystem.invalidate_cache(self._normalized_path)
Expand Down
9 changes: 9 additions & 0 deletions kedro-datasets/tests/partitions/test_partitioned_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ class FakeDataset: # pylint: disable=too-few-public-methods


class TestPartitionedDatasetLocal:
@pytest.mark.parametrize("dataset", ["pandas.ParquetDataset", ParquetDataset])
def test_repr(self, dataset):
pds = PartitionedDataset(path="", dataset=dataset)
assert (
repr(pds)
== """kedro_datasets.partitions.partitioned_dataset.PartitionedDataset(filepath='', """
"""dataset='kedro_datasets.pandas.parquet_dataset.ParquetDataset()')"""
)

@pytest.mark.parametrize("dataset", LOCAL_DATASET_DEFINITION)
@pytest.mark.parametrize(
"suffix,expected_num_parts", [("", 5), (".csv", 3), ("p4", 1)]
Expand Down

0 comments on commit da66452

Please sign in to comment.