From 76a9ce1e27a21359e7b0ef9e6cb865f8d95fc298 Mon Sep 17 00:00:00 2001 From: Dan Redding <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:54:24 +0100 Subject: [PATCH] refactor: Remove `toolz` dependency (#3426) --- altair/__init__.py | 2 - altair/_magics.py | 5 +- altair/utils/_importers.py | 3 +- altair/utils/_vegafusion_data.py | 53 ++++++-- altair/utils/data.py | 227 ++++++++++++++++++++----------- altair/utils/plugin_registry.py | 21 +-- altair/vegalite/data.py | 30 ++-- altair/vegalite/v5/__init__.py | 2 - altair/vegalite/v5/api.py | 12 +- altair/vegalite/v5/data.py | 4 - pyproject.toml | 1 - tests/utils/test_data.py | 40 +++--- 12 files changed, 252 insertions(+), 148 deletions(-) diff --git a/altair/__init__.py b/altair/__init__.py index 55feb3cdb..2f9e4acad 100644 --- a/altair/__init__.py +++ b/altair/__init__.py @@ -572,7 +572,6 @@ "concat", "condition", "core", - "curry", "data", "data_transformers", "datum", @@ -591,7 +590,6 @@ "overload", "param", "parse_shorthand", - "pipe", "renderers", "repeat", "sample", diff --git a/altair/_magics.py b/altair/_magics.py index bac190aa3..246ea2a1f 100644 --- a/altair/_magics.py +++ b/altair/_magics.py @@ -10,7 +10,6 @@ import IPython from IPython.core import magic_arguments import pandas as pd -from toolz import curried from altair.vegalite import v5 as vegalite_v5 @@ -41,7 +40,9 @@ def _prepare_data(data, data_transformers): if data is None or isinstance(data, dict): return data elif isinstance(data, pd.DataFrame): - return curried.pipe(data, data_transformers.get()) + if func := data_transformers.get(): + data = func(data) + return data elif isinstance(data, str): return {"url": data} else: diff --git a/altair/utils/_importers.py b/altair/utils/_importers.py index b7fa8a958..9f7f567e4 100644 --- a/altair/utils/_importers.py +++ b/altair/utils/_importers.py @@ -1,6 +1,7 @@ +from importlib.metadata import version as importlib_version from types import ModuleType + from packaging.version import Version -from importlib.metadata import version as importlib_version def import_vegafusion() -> ModuleType: diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index ce30e8d6d..cfd8a5760 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -1,8 +1,9 @@ -from toolz import curried +from __future__ import annotations import uuid from weakref import WeakValueDictionary - from typing import ( + Any, + Optional, Union, Dict, Set, @@ -10,14 +11,22 @@ TypedDict, Final, TYPE_CHECKING, + overload, + Callable, ) from altair.utils._importers import import_vegafusion from altair.utils.core import DataFrameLike -from altair.utils.data import DataType, ToValuesReturnType, MaxRowsError +from altair.utils.data import ( + DataType, + ToValuesReturnType, + MaxRowsError, + SupportsGeoInterface, +) from altair.vegalite.data import default_data_transformer if TYPE_CHECKING: + import pandas as pd from vegafusion.runtime import ChartState # type: ignore # Temporary storage for dataframes that have been extracted @@ -36,21 +45,41 @@ class _ToVegaFusionReturnUrlDict(TypedDict): url: str -@curried.curry +_VegaFusionReturnType = Union[_ToVegaFusionReturnUrlDict, ToValuesReturnType] + + +@overload +def vegafusion_data_transformer( + data: None = ..., max_rows: int = ... +) -> Callable[..., Any]: ... + + +@overload +def vegafusion_data_transformer( + data: DataFrameLike, max_rows: int +) -> ToValuesReturnType: ... + + +@overload def vegafusion_data_transformer( - data: DataType, max_rows: int = 100000 -) -> Union[_ToVegaFusionReturnUrlDict, ToValuesReturnType]: + data: Union[dict, pd.DataFrame, SupportsGeoInterface], max_rows: int +) -> _VegaFusionReturnType: ... + + +def vegafusion_data_transformer( + data: Optional[DataType] = None, max_rows: int = 100000 +) -> Union[Callable[..., Any], _VegaFusionReturnType]: """VegaFusion Data Transformer""" - if hasattr(data, "__geo_interface__"): - # Use default transformer for geo interface objects - # # (e.g. a geopandas GeoDataFrame) - return default_data_transformer(data) - elif isinstance(data, DataFrameLike): + if data is None: + return vegafusion_data_transformer + elif isinstance(data, DataFrameLike) and not isinstance(data, SupportsGeoInterface): table_name = f"table_{uuid.uuid4()}".replace("-", "_") extracted_inline_tables[table_name] = data return {"url": VEGAFUSION_PREFIX + table_name} else: - # Use default transformer if we don't recognize data type + # Use default transformer for geo interface objects + # # (e.g. a geopandas GeoDataFrame) + # Or if we don't recognize data type return default_data_transformer(data) diff --git a/altair/utils/data.py b/altair/utils/data.py index 871b43092..41b5e71aa 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -1,28 +1,43 @@ +from functools import partial import json -import os import random import hashlib -import warnings -from typing import Union, MutableMapping, Optional, Dict, Sequence, TYPE_CHECKING, List +import sys +from pathlib import Path +from typing import ( + Union, + MutableMapping, + Optional, + Dict, + Sequence, + TYPE_CHECKING, + List, + TypeVar, + Protocol, + TypedDict, + Literal, + overload, + runtime_checkable, + Any, +) import pandas as pd -from toolz import curried -from typing import TypeVar from ._importers import import_pyarrow_interchange from .core import sanitize_dataframe, sanitize_arrow_table, DataFrameLike from .core import sanitize_geo_interface -from .deprecation import AltairDeprecationWarning from .plugin_registry import PluginRegistry - -from typing import Protocol, TypedDict, Literal - +if sys.version_info >= (3, 13): + from typing import TypeIs +else: + from typing_extensions import TypeIs if TYPE_CHECKING: import pyarrow.lib +@runtime_checkable class SupportsGeoInterface(Protocol): __geo_interface__: MutableMapping @@ -32,6 +47,13 @@ class SupportsGeoInterface(Protocol): VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] +SampleReturnType = Optional[ + Union[pd.DataFrame, Dict[str, Sequence], "pyarrow.lib.Table"] +] + + +def is_data_type(obj: Any) -> TypeIs[DataType]: + return isinstance(obj, (dict, pd.DataFrame, DataFrameLike, SupportsGeoInterface)) # ============================================================================== @@ -46,8 +68,13 @@ class SupportsGeoInterface(Protocol): # form. # ============================================================================== class DataTransformerType(Protocol): - def __call__(self, data: DataType, **kwargs) -> VegaLiteDataDict: - pass + @overload + def __call__(self, data: None = None, **kwargs) -> "DataTransformerType": ... + @overload + def __call__(self, data: DataType, **kwargs) -> VegaLiteDataDict: ... + def __call__( + self, data: Optional[DataType] = None, **kwargs + ) -> Union["DataTransformerType", VegaLiteDataDict]: ... class DataTransformerRegistry(PluginRegistry[DataTransformerType]): @@ -69,12 +96,19 @@ class MaxRowsError(Exception): pass -@curried.curry -def limit_rows(data: TDataType, max_rows: Optional[int] = 5000) -> TDataType: +@overload +def limit_rows(data: None = ..., max_rows: Optional[int] = ...) -> partial: ... +@overload +def limit_rows(data: DataType, max_rows: Optional[int] = ...) -> DataType: ... +def limit_rows( + data: Optional[DataType] = None, max_rows: Optional[int] = 5000 +) -> Union[partial, DataType]: """Raise MaxRowsError if the data model has more than max_rows. If max_rows is None, then do not perform any check. """ + if data is None: + return partial(limit_rows, max_rows=max_rows) check_data_type(data) def raise_max_rows_error(): @@ -91,7 +125,7 @@ def raise_max_rows_error(): "on how to plot large datasets." ) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): if data.__geo_interface__["type"] == "FeatureCollection": values = data.__geo_interface__["features"] else: @@ -102,9 +136,7 @@ def raise_max_rows_error(): if "values" in data: values = data["values"] else: - # mypy gets confused as it doesn't see Dict[Any, Any] - # as equivalent to TDataType - return data # type: ignore[return-value] + return data elif isinstance(data, DataFrameLike): pa_table = arrow_table_from_dfi_dataframe(data) if max_rows is not None and pa_table.num_rows > max_rows: @@ -119,11 +151,22 @@ def raise_max_rows_error(): return data -@curried.curry +@overload def sample( - data: DataType, n: Optional[int] = None, frac: Optional[float] = None -) -> Optional[Union[pd.DataFrame, Dict[str, Sequence], "pyarrow.lib.Table"]]: + data: None = ..., n: Optional[int] = ..., frac: Optional[float] = ... +) -> partial: ... +@overload +def sample( + data: DataType, n: Optional[int], frac: Optional[float] +) -> SampleReturnType: ... +def sample( + data: Optional[DataType] = None, + n: Optional[int] = None, + frac: Optional[float] = None, +) -> Union[partial, SampleReturnType]: """Reduce the size of the data model by sampling without replacement.""" + if data is None: + return partial(sample, n=n, frac=frac) check_data_type(data) if isinstance(data, pd.DataFrame): return data.sample(n=n, frac=frac) @@ -157,65 +200,115 @@ def sample( return None -class _JsonFormatDict(TypedDict): - type: Literal["json"] +_FormatType = Literal["csv", "json"] -class _CsvFormatDict(TypedDict): - type: Literal["csv"] +class _FormatDict(TypedDict): + type: _FormatType -class _ToJsonReturnUrlDict(TypedDict): +class _ToFormatReturnUrlDict(TypedDict): url: str - format: _JsonFormatDict + format: _FormatDict -class _ToCsvReturnUrlDict(TypedDict): - url: str - format: _CsvFormatDict +@overload +def to_json( + data: None = ..., + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., +) -> partial: ... -@curried.curry +@overload def to_json( data: DataType, + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., +) -> _ToFormatReturnUrlDict: ... + + +def to_json( + data: Optional[DataType] = None, prefix: str = "altair-data", extension: str = "json", filename: str = "{prefix}-{hash}.{extension}", urlpath: str = "", -) -> _ToJsonReturnUrlDict: +) -> Union[partial, _ToFormatReturnUrlDict]: """ Write the data model to a .json file and return a url based data model. """ - data_json = _data_to_json_string(data) - data_hash = _compute_data_hash(data_json) - filename = filename.format(prefix=prefix, hash=data_hash, extension=extension) - with open(filename, "w") as f: - f.write(data_json) - return {"url": os.path.join(urlpath, filename), "format": {"type": "json"}} + kwds = _to_text_kwds(prefix, extension, filename, urlpath) + if data is None: + return partial(to_json, **kwds) + else: + data_str = _data_to_json_string(data) + return _to_text(data_str, **kwds, format=_FormatDict(type="json")) + + +@overload +def to_csv( + data: None = ..., + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., +) -> partial: ... -@curried.curry +@overload def to_csv( data: Union[dict, pd.DataFrame, DataFrameLike], + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., +) -> _ToFormatReturnUrlDict: ... + + +def to_csv( + data: Optional[Union[dict, pd.DataFrame, DataFrameLike]] = None, prefix: str = "altair-data", extension: str = "csv", filename: str = "{prefix}-{hash}.{extension}", urlpath: str = "", -) -> _ToCsvReturnUrlDict: +) -> Union[partial, _ToFormatReturnUrlDict]: """Write the data model to a .csv file and return a url based data model.""" - data_csv = _data_to_csv_string(data) - data_hash = _compute_data_hash(data_csv) + kwds = _to_text_kwds(prefix, extension, filename, urlpath) + if data is None: + return partial(to_csv, **kwds) + else: + data_str = _data_to_csv_string(data) + return _to_text(data_str, **kwds, format=_FormatDict(type="csv")) + + +def _to_text( + data: str, + prefix: str, + extension: str, + filename: str, + urlpath: str, + format: _FormatDict, +) -> _ToFormatReturnUrlDict: + data_hash = _compute_data_hash(data) filename = filename.format(prefix=prefix, hash=data_hash, extension=extension) - with open(filename, "w") as f: - f.write(data_csv) - return {"url": os.path.join(urlpath, filename), "format": {"type": "csv"}} + Path(filename).write_text(data) + url = str(Path(urlpath, filename)) + return _ToFormatReturnUrlDict({"url": url, "format": format}) + + +def _to_text_kwds(prefix: str, extension: str, filename: str, urlpath: str, /) -> Dict[str, str]: # fmt: skip + return {"prefix": prefix, "extension": extension, "filename": filename, "urlpath": urlpath} # fmt: skip -@curried.curry def to_values(data: DataType) -> ToValuesReturnType: """Replace a DataFrame by a data model with values.""" check_data_type(data) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): if isinstance(data, pd.DataFrame): data = sanitize_dataframe(data) # Maybe the type could be further clarified here that it is @@ -238,9 +331,7 @@ def to_values(data: DataType) -> ToValuesReturnType: def check_data_type(data: DataType) -> None: - if not isinstance(data, (dict, pd.DataFrame, DataFrameLike)) and not any( - hasattr(data, attr) for attr in ["__geo_interface__"] - ): + if not is_data_type(data): raise TypeError( "Expected dict, DataFrame or a __geo_interface__ attribute, got: {}".format( type(data) @@ -258,7 +349,7 @@ def _compute_data_hash(data_str: str) -> str: def _data_to_json_string(data: DataType) -> str: """Return a JSON string representation of the input data""" check_data_type(data) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): if isinstance(data, pd.DataFrame): data = sanitize_dataframe(data) # Maybe the type could be further clarified here that it is @@ -284,10 +375,11 @@ def _data_to_json_string(data: DataType) -> str: def _data_to_csv_string(data: Union[dict, pd.DataFrame, DataFrameLike]) -> str: """return a CSV string representation of the input data""" check_data_type(data) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): raise NotImplementedError( - "to_csv does not work with data that " - "contains the __geo_interface__ attribute" + f"to_csv does not yet work with data that " + f"is of type {type(SupportsGeoInterface).__name__!r}.\n" + f"See https://github.com/vega/altair/issues/3441" ) elif isinstance(data, pd.DataFrame): data = sanitize_dataframe(data) @@ -311,35 +403,6 @@ def _data_to_csv_string(data: Union[dict, pd.DataFrame, DataFrameLike]) -> str: ) -def pipe(data, *funcs): - """ - Pipe a value through a sequence of functions - - Deprecated: use toolz.curried.pipe() instead. - """ - warnings.warn( - "alt.pipe() is deprecated, and will be removed in a future release. " - "Use toolz.curried.pipe() instead.", - AltairDeprecationWarning, - stacklevel=1, - ) - return curried.pipe(data, *funcs) - - -def curry(*args, **kwargs): - """Curry a callable function - - Deprecated: use toolz.curried.curry() instead. - """ - warnings.warn( - "alt.curry() is deprecated, and will be removed in a future release. " - "Use toolz.curried.curry() instead.", - AltairDeprecationWarning, - stacklevel=1, - ) - return curried.curry(*args, **kwargs) - - def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> "pyarrow.lib.Table": """Convert a DataFrame Interchange Protocol compatible object to an Arrow Table""" import pyarrow as pa diff --git a/altair/utils/plugin_registry.py b/altair/utils/plugin_registry.py index f6281ed14..b1cce92ec 100644 --- a/altair/utils/plugin_registry.py +++ b/altair/utils/plugin_registry.py @@ -1,10 +1,8 @@ -from typing import Any, Dict, List, Optional, Generic, TypeVar, cast +from functools import partial +from typing import Any, Dict, List, Optional, Generic, TypeVar, Union, cast, Callable from types import TracebackType - from importlib.metadata import entry_points -from toolz import curry - PluginType = TypeVar("PluginType") @@ -71,7 +69,7 @@ class PluginRegistry(Generic[PluginType]): # in the registry rather than passed to the plugins _global_settings = {} # type: Dict[str, Any] - def __init__(self, entry_point_group: str = "", plugin_type: type = object): + def __init__(self, entry_point_group: str = "", plugin_type: type = Callable): # type: ignore[assignment] """Create a PluginRegistry for a named entry point group. Parameters @@ -90,7 +88,9 @@ def __init__(self, entry_point_group: str = "", plugin_type: type = object): self._options = {} # type: Dict[str, Any] self._global_settings = self.__class__._global_settings.copy() # type: dict - def register(self, name: str, value: Optional[PluginType]) -> Optional[PluginType]: + def register( + self, name: str, value: Union[Optional[PluginType], Any] + ) -> Optional[PluginType]: """Register a plugin by name and value. This method is used for explicit registration of a plugin and shouldn't be @@ -199,10 +199,15 @@ def options(self) -> Dict[str, Any]: """Return the current options dictionary""" return self._options - def get(self) -> Optional[PluginType]: + def get(self) -> Optional[Union[PluginType, Callable[..., Any]]]: """Return the currently active plugin.""" if self._options: - return curry(self._active, **self._options) + if func := self._active: + # NOTE: Fully do not understand this one + # error: Argument 1 to "partial" has incompatible type "PluginType"; expected "Callable[..., Never]" + return partial(func, **self._options) # type: ignore[arg-type] + else: + raise TypeError("Unclear what this meant by passing to curry.") else: return self._active diff --git a/altair/vegalite/data.py b/altair/vegalite/data.py index fbeda0fee..5b7a90a3d 100644 --- a/altair/vegalite/data.py +++ b/altair/vegalite/data.py @@ -1,10 +1,7 @@ -from toolz import curried from ..utils.core import sanitize_dataframe from ..utils.data import ( MaxRowsError, - curry, limit_rows, - pipe, sample, to_csv, to_json, @@ -14,13 +11,30 @@ from ..utils.data import DataTransformerRegistry as _DataTransformerRegistry from ..utils.data import DataType, ToValuesReturnType from ..utils.plugin_registry import PluginEnabler +from typing import Optional, Union, overload, Callable -@curried.curry +@overload def default_data_transformer( - data: DataType, max_rows: int = 5000 -) -> ToValuesReturnType: - return curried.pipe(data, limit_rows(max_rows=max_rows), to_values) + data: None = ..., max_rows: int = ... +) -> Callable[[DataType], ToValuesReturnType]: ... +@overload +def default_data_transformer( + data: DataType, max_rows: int = ... +) -> ToValuesReturnType: ... +def default_data_transformer( + data: Optional[DataType] = None, max_rows: int = 5000 +) -> Union[Callable[[DataType], ToValuesReturnType], ToValuesReturnType]: + if data is None: + + def pipe(data: DataType, /) -> ToValuesReturnType: + data = limit_rows(data, max_rows=max_rows) + return to_values(data) + + return pipe + + else: + return to_values(limit_rows(data, max_rows=max_rows)) class DataTransformerRegistry(_DataTransformerRegistry): @@ -36,11 +50,9 @@ def disable_max_rows(self) -> PluginEnabler: __all__ = ( "DataTransformerRegistry", "MaxRowsError", - "curry", "sanitize_dataframe", "default_data_transformer", "limit_rows", - "pipe", "sample", "to_csv", "to_json", diff --git a/altair/vegalite/v5/__init__.py b/altair/vegalite/v5/__init__.py index 8c75d5cc2..17c61a6e5 100644 --- a/altair/vegalite/v5/__init__.py +++ b/altair/vegalite/v5/__init__.py @@ -9,8 +9,6 @@ from .data import ( MaxRowsError, - pipe, - curry, limit_rows, sample, to_json, diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index 37a13c2ae..6425c77fe 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -4,8 +4,6 @@ import io import json import jsonschema -import pandas as pd -from toolz.curried import pipe as _pipe import itertools import sys import pathlib @@ -29,7 +27,7 @@ compile_with_vegafusion as _compile_with_vegafusion, ) from ...utils.core import DataFrameLike -from ...utils.data import DataType +from ...utils.data import DataType, is_data_type as _is_data_type from ...utils.deprecation import AltairDeprecationWarning if sys.version_info >= (3, 13): @@ -114,16 +112,14 @@ def _prepare_data(data, context=None): return data # convert dataframes or objects with __geo_interface__ to dict - elif isinstance(data, pd.DataFrame) or hasattr(data, "__geo_interface__"): - data = _pipe(data, data_transformers.get()) + elif not isinstance(data, dict) and _is_data_type(data): + if func := data_transformers.get(): + data = func(data) # convert string input to a URLData elif isinstance(data, str): data = core.UrlData(data) - elif isinstance(data, DataFrameLike): - data = _pipe(data, data_transformers.get()) - # consolidate inline data to top-level datasets if context is not None and data_transformers.consolidate_datasets: data = _consolidate_data(data, context) diff --git a/altair/vegalite/v5/data.py b/altair/vegalite/v5/data.py index 1e47db526..841c1d8bb 100644 --- a/altair/vegalite/v5/data.py +++ b/altair/vegalite/v5/data.py @@ -1,9 +1,7 @@ from ..data import ( MaxRowsError, - curry, default_data_transformer, limit_rows, - pipe, sample, to_csv, to_json, @@ -34,10 +32,8 @@ __all__ = ( "MaxRowsError", - "curry", "default_data_transformer", "limit_rows", - "pipe", "sample", "to_csv", "to_json", diff --git a/pyproject.toml b/pyproject.toml index 5a25a2dba..ba6de061a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ dependencies = [ "numpy<2.0.0", # If you update the minimum required pandas version, also update it in build.yml "pandas>=0.25", - "toolz", "packaging" ] description = "Vega-Altair: A declarative statistical visualization library for Python." diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index 0d746e79d..889e919be 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -1,9 +1,7 @@ import os - +from typing import Any, Callable import pytest import pandas as pd -from toolz import pipe - from altair.utils.data import ( limit_rows, MaxRowsError, @@ -14,6 +12,14 @@ ) +def _pipe(data: Any, *funcs: Callable[..., Any]) -> Any: + # Redefined to maintain existing tests + # Originally part of `toolz` dependency + for func in funcs: + data = func(data) + return data + + def _create_dataframe(N): data = pd.DataFrame({"x": range(N), "y": range(N)}) return data @@ -30,9 +36,9 @@ def test_limit_rows(): result = limit_rows(data, max_rows=20) assert data is result with pytest.raises(MaxRowsError): - pipe(data, limit_rows(max_rows=5)) + _pipe(data, limit_rows(max_rows=5)) data = _create_data_with_values(10) - result = pipe(data, limit_rows(max_rows=20)) + result = _pipe(data, limit_rows(max_rows=20)) assert data is result with pytest.raises(MaxRowsError): limit_rows(data, max_rows=5) @@ -41,7 +47,7 @@ def test_limit_rows(): def test_sample(): """Test the sample data transformer.""" data = _create_dataframe(20) - result = pipe(data, sample(n=10)) + result = _pipe(data, sample(n=10)) assert len(result) == 10 assert isinstance(result, pd.DataFrame) data = _create_data_with_values(20) @@ -50,7 +56,7 @@ def test_sample(): assert "values" in result assert len(result["values"]) == 10 data = _create_dataframe(20) - result = pipe(data, sample(frac=0.5)) + result = _pipe(data, sample(frac=0.5)) assert len(result) == 10 assert isinstance(result, pd.DataFrame) data = _create_data_with_values(20) @@ -63,7 +69,7 @@ def test_sample(): def test_to_values(): """Test the to_values data transformer.""" data = _create_dataframe(10) - result = pipe(data, to_values) + result = _pipe(data, to_values) assert result == {"values": data.to_dict(orient="records")} @@ -71,7 +77,7 @@ def test_type_error(): """Ensure that TypeError is raised for types other than dict/DataFrame.""" for f in (sample, limit_rows, to_values): with pytest.raises(TypeError): - pipe(0, f) + _pipe(0, f) def test_dataframe_to_json(): @@ -81,8 +87,8 @@ def test_dataframe_to_json(): """ data = _create_dataframe(10) try: - result1 = pipe(data, to_json) - result2 = pipe(data, to_json) + result1 = _pipe(data, to_json) + result2 = _pipe(data, to_json) filename = result1["url"] output = pd.read_json(filename) finally: @@ -99,8 +105,8 @@ def test_dict_to_json(): """ data = _create_data_with_values(10) try: - result1 = pipe(data, to_json) - result2 = pipe(data, to_json) + result1 = _pipe(data, to_json) + result2 = _pipe(data, to_json) filename = result1["url"] output = pd.read_json(filename).to_dict(orient="records") finally: @@ -117,8 +123,8 @@ def test_dataframe_to_csv(): """ data = _create_dataframe(10) try: - result1 = pipe(data, to_csv) - result2 = pipe(data, to_csv) + result1 = _pipe(data, to_csv) + result2 = _pipe(data, to_csv) filename = result1["url"] output = pd.read_csv(filename) finally: @@ -135,8 +141,8 @@ def test_dict_to_csv(): """ data = _create_data_with_values(10) try: - result1 = pipe(data, to_csv) - result2 = pipe(data, to_csv) + result1 = _pipe(data, to_csv) + result2 = _pipe(data, to_csv) filename = result1["url"] output = pd.read_csv(filename).to_dict(orient="records") finally: