From c3fbf4a64fd1c48a6f11e22c38fd021acf8f8e31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Mon, 28 Aug 2023 22:11:44 +0200 Subject: [PATCH 01/13] Add simple polars support --- hvplot/polars.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 hvplot/polars.py diff --git a/hvplot/polars.py b/hvplot/polars.py new file mode 100644 index 000000000..4352b8e5a --- /dev/null +++ b/hvplot/polars.py @@ -0,0 +1,57 @@ +import itertools + +from hvplot import hvPlotTabular, post_patch +from hvplot.converter import HoloViewsConverter + +try: + import polars as pl +except: + raise ImportError( + "Could not patch plotting API onto Polars. Polars could not be imported." + ) + + +@pl.api.register_dataframe_namespace("hvplot") +@pl.api.register_series_namespace("hvplot") +@pl.api.register_lazyframe_namespace("hvplot") +class hvPlotTabularPolars(hvPlotTabular): + def _get_converter(self, x=None, y=None, kind=None, **kwds): + params = dict(self._metadata, **kwds) + x = x or params.pop("x", None) + y = y or params.pop("y", None) + kind = kind or params.pop("kind", None) + + # Find columns which should be converted for LazyDataFrame and DataFrame + if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)): + possible_columns = [ + [v] if isinstance(v, str) else v + for v in params.values() + if isinstance(v, (str, list)) + ] + columns = ( + set(self._data.columns) & set(itertools.chain(*possible_columns)) + ) or {self._data.columns[0]} + columns |= {x, y} + columns.discard(None) + + if isinstance(self._data, pl.DataFrame): + data = self._data.select(columns).to_pandas() + elif isinstance(self._data, pl.Series): + data = self._data.to_pandas() + elif isinstance(self._data, pl.LazyFrame): + data = self._data.select(columns).collect().to_pandas() + else: + raise ValueError( + "Only Polars DataFrame, Series, and LazyFrame are supported" + ) + + return HoloViewsConverter(data, x, y, kind=kind, **params) + + +def patch(name="hvplot", extension="bokeh", logo=False): + import hvplot.pandas # noqa + + post_patch(extension, logo) + + +patch() From da9d91e4d8feac4d95ad7f9548d1445079cad18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Tue, 29 Aug 2023 09:14:43 +0200 Subject: [PATCH 02/13] Support hover_cols="all" --- hvplot/polars.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/hvplot/polars.py b/hvplot/polars.py index 4352b8e5a..dd5ac5dce 100644 --- a/hvplot/polars.py +++ b/hvplot/polars.py @@ -23,16 +23,19 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds): # Find columns which should be converted for LazyDataFrame and DataFrame if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)): - possible_columns = [ - [v] if isinstance(v, str) else v - for v in params.values() - if isinstance(v, (str, list)) - ] - columns = ( - set(self._data.columns) & set(itertools.chain(*possible_columns)) - ) or {self._data.columns[0]} - columns |= {x, y} - columns.discard(None) + if params.get("hover_cols") == "all": + columns = list(self._data.columns) + else: + possible_columns = [ + [v] if isinstance(v, str) else v + for v in params.values() + if isinstance(v, (str, list)) + ] + columns = ( + set(self._data.columns) & set(itertools.chain(*possible_columns)) + ) or {self._data.columns[0]} + columns |= {x, y} + columns.discard(None) if isinstance(self._data, pl.DataFrame): data = self._data.select(columns).to_pandas() From a9cd914a0ccd67e4e0603e7b62034a51b6a88194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 7 Sep 2023 10:08:52 +0200 Subject: [PATCH 03/13] Handle list_like x and y --- hvplot/polars.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hvplot/polars.py b/hvplot/polars.py index dd5ac5dce..30a0c0364 100644 --- a/hvplot/polars.py +++ b/hvplot/polars.py @@ -2,6 +2,7 @@ from hvplot import hvPlotTabular, post_patch from hvplot.converter import HoloViewsConverter +from hvplot.util import is_list_like try: import polars as pl @@ -34,7 +35,9 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds): columns = ( set(self._data.columns) & set(itertools.chain(*possible_columns)) ) or {self._data.columns[0]} - columns |= {x, y} + xs = x if is_list_like(x) else (x,) + ys = y if is_list_like(y) else (y,) + columns |= {*xs, *ys} columns.discard(None) if isinstance(self._data, pl.DataFrame): From e38b4da0217d8bff381cc045345750c9cbef1dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 7 Sep 2023 10:12:11 +0200 Subject: [PATCH 04/13] Add tests to polars --- hvplot/tests/plotting/testcore.py | 55 ++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/hvplot/tests/plotting/testcore.py b/hvplot/tests/plotting/testcore.py index a2ed1ea3a..67ee561c0 100644 --- a/hvplot/tests/plotting/testcore.py +++ b/hvplot/tests/plotting/testcore.py @@ -4,18 +4,63 @@ import pytest -@pytest.mark.parametrize("y", ( +try: + import polars as pl + import hvplot.polars # noqa +except ImportError: + pl = None + + +FRAME_IGNORE_TYPES = {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"} +SERIES_IGNORE_TYPES = {*FRAME_IGNORE_TYPES, "points", "polygons", "ohlc", "paths"} + +y_combinations = pytest.mark.parametrize("y", ( ["A", "B", "C", "D"], ("A", "B", "C", "D"), {"A", "B", "C", "D"}, np.array(["A", "B", "C", "D"]), pd.Index(["A", "B", "C", "D"]), pd.Series(["A", "B", "C", "D"]), - )) -def test_diffent_input_types(y): + ), + ids=lambda x: type(x).__name__ +) + +@y_combinations +def test_diffent_input_types_pandas(y): df = pd._testing.makeDataFrame() types = {t for t in dir(df.hvplot) if not t.startswith("_")} - ignore_types = {'bivariate', 'heatmap', 'hexbin', 'labels', 'vectorfield'} - for t in types - ignore_types: + for t in types - FRAME_IGNORE_TYPES: + df.hvplot(y=y, kind=t) + + +def test_series_pandas(): + ser = pd.Series(np.random.rand(10), name="A") + assert isinstance(ser, pd.Series) + + types = {t for t in dir(ser.hvplot) if not t.startswith("_")} + for t in types - SERIES_IGNORE_TYPES: + ser.hvplot(kind=t) + + + +@pytest.mark.skipif(pl is None, reason="polars not installed") +@pytest.mark.parametrize("cast", (pl.DataFrame, pl.LazyFrame)) +@y_combinations +def test_diffent_input_types_polars(y, cast): + df = cast(pd._testing.makeDataFrame()) + assert isinstance(df, cast) + + types = {t for t in dir(df.hvplot) if not t.startswith("_")} + for t in types - FRAME_IGNORE_TYPES: df.hvplot(y=y, kind=t) + + +@pytest.mark.skipif(pl is None, reason="polars not installed") +def test_series_polars(): + ser = pl.Series(values=np.random.rand(10), name="A") + assert isinstance(ser, pl.Series) + + types = {t for t in dir(ser.hvplot) if not t.startswith("_")} + for t in types - SERIES_IGNORE_TYPES: + ser.hvplot(kind=t) From 638b509d1ebbbadbcf560b724e9a50d729948346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 7 Sep 2023 10:34:37 +0200 Subject: [PATCH 05/13] Add small documentation --- doc/index.md | 14 ++++++++++++++ examples/user_guide/Introduction.ipynb | 1 + 2 files changed, 15 insertions(+) diff --git a/doc/index.md b/doc/index.md index 5860ba937..02acfe187 100644 --- a/doc/index.md +++ b/doc/index.md @@ -86,6 +86,20 @@ alt: Works with Dask align: center --- ::: +:::{tab-item} Polars +```python +import polars +import hvplot.polars + +df_polars = polars.from_pandas(df) +df_polars.hvplot.scatter(x='bill_length_mm', y='bill_depth_mm', by='species') +``` +```{image} ./_static/home/dask.gif +--- +alt: Works with Polars +align: center +--- +::: :::{tab-item} GeoPandas ```python diff --git a/examples/user_guide/Introduction.ipynb b/examples/user_guide/Introduction.ipynb index 825e4dc61..9a7a01e2b 100644 --- a/examples/user_guide/Introduction.ipynb +++ b/examples/user_guide/Introduction.ipynb @@ -8,6 +8,7 @@ "\n", "* [Pandas](https://pandas.pydata.org): DataFrame, Series (columnar/tabular data)\n", "* [Rapids cuDF](https://docs.rapids.ai/api/cudf/stable/): GPU DataFrame, Series (columnar/tabular data)\n", + "* [Polars](https://www.pola.rs/): Polars is a fast DataFrame library/in-memory query engine (columnar/tabular data)\n", "* [Dask](https://www.dask.org): DataFrame, Series (distributed/out of core arrays and columnar data)\n", "* [XArray](https://xarray.pydata.org): Dataset, DataArray (labelled multidimensional arrays)\n", "* [Streamz](https://streamz.readthedocs.io): DataFrame(s), Series(s) (streaming columnar data)\n", From 57e45ca70b1da0c714f00b9933a832f3ef48fdb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 7 Sep 2023 10:43:22 +0200 Subject: [PATCH 06/13] Add polars to test packages and handle if polars not installed --- hvplot/tests/plotting/testcore.py | 11 ++++++++--- setup.py | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hvplot/tests/plotting/testcore.py b/hvplot/tests/plotting/testcore.py index 67ee561c0..5aae05afd 100644 --- a/hvplot/tests/plotting/testcore.py +++ b/hvplot/tests/plotting/testcore.py @@ -7,8 +7,13 @@ try: import polars as pl import hvplot.polars # noqa + skip_polar = False except ImportError: - pl = None + class pl: + DataFrame = None + LazyFrame = None + Series = None + skip_polar = True FRAME_IGNORE_TYPES = {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"} @@ -44,7 +49,7 @@ def test_series_pandas(): -@pytest.mark.skipif(pl is None, reason="polars not installed") +@pytest.mark.skipif(skip_polar, reason="polars not installed") @pytest.mark.parametrize("cast", (pl.DataFrame, pl.LazyFrame)) @y_combinations def test_diffent_input_types_polars(y, cast): @@ -56,7 +61,7 @@ def test_diffent_input_types_polars(y, cast): df.hvplot(y=y, kind=t) -@pytest.mark.skipif(pl is None, reason="polars not installed") +@pytest.mark.skipif(skip_polar, reason="polars not installed") def test_series_polars(): ser = pl.Series(values=np.random.rand(10), name="A") assert isinstance(ser, pl.Series) diff --git a/setup.py b/setup.py index 0bc3a79cc..c5e627457 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def get_setup_version(reponame): 'pooch', 'scipy', 'ipywidgets', + 'polars', ] # Dependencies required to run the notebooks From e80aaad69733aff86baab9c7b7a7ee76b7883da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 7 Sep 2023 11:13:32 +0200 Subject: [PATCH 07/13] Move polars after geopandas --- doc/index.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/index.md b/doc/index.md index 02acfe187..e5f810e81 100644 --- a/doc/index.md +++ b/doc/index.md @@ -86,20 +86,6 @@ alt: Works with Dask align: center --- ::: -:::{tab-item} Polars -```python -import polars -import hvplot.polars - -df_polars = polars.from_pandas(df) -df_polars.hvplot.scatter(x='bill_length_mm', y='bill_depth_mm', by='species') -``` -```{image} ./_static/home/dask.gif ---- -alt: Works with Polars -align: center ---- -::: :::{tab-item} GeoPandas ```python @@ -115,6 +101,20 @@ alt: Works with GeoPandas align: center --- ::: +:::{tab-item} Polars +```python +import polars +import hvplot.polars + +df_polars = polars.from_pandas(df) +df_polars.hvplot.scatter(x='bill_length_mm', y='bill_depth_mm', by='species') +``` +```{image} ./_static/home/dask.gif +--- +alt: Works with Polars +align: center +--- +::: :::{tab-item} Intake ```python From 40f8b9ca7465023ab2260c481a11ae3a4132874a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 7 Sep 2023 11:33:36 +0200 Subject: [PATCH 08/13] Parametrize the tests --- hvplot/tests/plotting/testcore.py | 45 ++++++++++++++----------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/hvplot/tests/plotting/testcore.py b/hvplot/tests/plotting/testcore.py index 5aae05afd..a65afd600 100644 --- a/hvplot/tests/plotting/testcore.py +++ b/hvplot/tests/plotting/testcore.py @@ -1,9 +1,10 @@ import numpy as np import pandas as pd import hvplot.pandas # noqa - import pytest +from hvplot import hvPlotTabular + try: import polars as pl import hvplot.polars # noqa @@ -16,8 +17,11 @@ class pl: skip_polar = True -FRAME_IGNORE_TYPES = {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"} -SERIES_IGNORE_TYPES = {*FRAME_IGNORE_TYPES, "points", "polygons", "ohlc", "paths"} +TYPES = {t for t in dir(hvPlotTabular) if not t.startswith("_")} +FRAME_TYPES = TYPES - {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"} +SERIES_TYPES = FRAME_TYPES - {"points", "polygons", "ohlc", "paths"} +frame_kinds = pytest.mark.parametrize("kind", FRAME_TYPES) +series_kinds = pytest.mark.parametrize("kind", SERIES_TYPES) y_combinations = pytest.mark.parametrize("y", ( ["A", "B", "C", "D"], @@ -30,42 +34,33 @@ class pl: ids=lambda x: type(x).__name__ ) + +@frame_kinds @y_combinations -def test_diffent_input_types_pandas(y): +def test_dataframe_pandas(kind, y): df = pd._testing.makeDataFrame() - types = {t for t in dir(df.hvplot) if not t.startswith("_")} - - for t in types - FRAME_IGNORE_TYPES: - df.hvplot(y=y, kind=t) + df.hvplot(y=y, kind=kind) -def test_series_pandas(): +@series_kinds +def test_series_pandas(kind): ser = pd.Series(np.random.rand(10), name="A") - assert isinstance(ser, pd.Series) - - types = {t for t in dir(ser.hvplot) if not t.startswith("_")} - for t in types - SERIES_IGNORE_TYPES: - ser.hvplot(kind=t) - + ser.hvplot(kind=kind) @pytest.mark.skipif(skip_polar, reason="polars not installed") @pytest.mark.parametrize("cast", (pl.DataFrame, pl.LazyFrame)) +@frame_kinds @y_combinations -def test_diffent_input_types_polars(y, cast): +def test_dataframe_polars(kind, y, cast): df = cast(pd._testing.makeDataFrame()) assert isinstance(df, cast) - - types = {t for t in dir(df.hvplot) if not t.startswith("_")} - for t in types - FRAME_IGNORE_TYPES: - df.hvplot(y=y, kind=t) + df.hvplot(y=y, kind=kind) @pytest.mark.skipif(skip_polar, reason="polars not installed") -def test_series_polars(): +@series_kinds +def test_series_polars(kind): ser = pl.Series(values=np.random.rand(10), name="A") assert isinstance(ser, pl.Series) - - types = {t for t in dir(ser.hvplot) if not t.startswith("_")} - for t in types - SERIES_IGNORE_TYPES: - ser.hvplot(kind=t) + ser.hvplot(kind=kind) From 384ee83a03ff1d5a1e21d93efdbf9ad416eea759 Mon Sep 17 00:00:00 2001 From: maximlt Date: Thu, 28 Sep 2023 23:52:57 +0200 Subject: [PATCH 09/13] add patch tests --- hvplot/tests/testpatch.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/hvplot/tests/testpatch.py b/hvplot/tests/testpatch.py index 485e27e96..8e4912813 100644 --- a/hvplot/tests/testpatch.py +++ b/hvplot/tests/testpatch.py @@ -100,3 +100,28 @@ def test_streamz_seriess_patched(self): from streamz.dataframe import Random random_df = Random() self.assertIsInstance(random_df.groupby('x').sum().y.hvplot, hvPlotTabular) + + +class TestPatchPolars(TestCase): + + def setUp(self): + try: + import polars as pl # noqa + except: + raise SkipTest('Polars not available') + import hvplot.polars # noqa + + def test_polars_series_patched(self): + import polars as pl + pseries = pl.Series([0, 1, 2]) + self.assertIsInstance(pseries.hvplot, hvPlotTabular) + + def test_polars_dataframe_patched(self): + import polars as pl + pdf = pl.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + self.assertIsInstance(pdf.hvplot, hvPlotTabular) + + def test_polars_lazyframe_patched(self): + import polars as pl + pldf = pl.LazyFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + self.assertIsInstance(pldf.hvplot, hvPlotTabular) From 5562c4e0c65cb204a97fa0f267924dd810f82853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Fri, 29 Sep 2023 08:54:21 +0200 Subject: [PATCH 10/13] Update hvplot/polars.py Co-authored-by: Maxime Liquet <35924738+maximlt@users.noreply.github.com> --- hvplot/polars.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hvplot/polars.py b/hvplot/polars.py index 30a0c0364..f3d3216d2 100644 --- a/hvplot/polars.py +++ b/hvplot/polars.py @@ -1,3 +1,4 @@ +"""Adds the `.hvplot` method to pl.DataFrame, pl.LazyFrame and pl.Series""" import itertools from hvplot import hvPlotTabular, post_patch From 1d6bee6ea6d44dc21fb29b1a10b044bc1a7e2134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Fri, 29 Sep 2023 08:55:59 +0200 Subject: [PATCH 11/13] Remove hvplot.pandas import --- hvplot/polars.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hvplot/polars.py b/hvplot/polars.py index f3d3216d2..44f936761 100644 --- a/hvplot/polars.py +++ b/hvplot/polars.py @@ -56,8 +56,6 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds): def patch(name="hvplot", extension="bokeh", logo=False): - import hvplot.pandas # noqa - post_patch(extension, logo) From 6c2e9fa00a302d8162d8b387fa0019967f1cd4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Mon, 2 Oct 2023 11:58:38 +0200 Subject: [PATCH 12/13] Apply suggestions from code review Co-authored-by: Maxime Liquet <35924738+maximlt@users.noreply.github.com> --- hvplot/polars.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hvplot/polars.py b/hvplot/polars.py index 44f936761..6b1320c37 100644 --- a/hvplot/polars.py +++ b/hvplot/polars.py @@ -13,9 +13,6 @@ ) -@pl.api.register_dataframe_namespace("hvplot") -@pl.api.register_series_namespace("hvplot") -@pl.api.register_lazyframe_namespace("hvplot") class hvPlotTabularPolars(hvPlotTabular): def _get_converter(self, x=None, y=None, kind=None, **kwds): params = dict(self._metadata, **kwds) @@ -56,6 +53,10 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds): def patch(name="hvplot", extension="bokeh", logo=False): + pl.api.register_dataframe_namespace(name)(hvPlotTabularPolars) + pl.api.register_series_namespace(name)(hvPlotTabularPolars) + pl.api.register_lazyframe_namespace(name)(hvPlotTabularPolars) + post_patch(extension, logo) From 16df551779b81a2a8a27848172bfbb671302f8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Mon, 2 Oct 2023 12:03:56 +0200 Subject: [PATCH 13/13] Move import into patch --- hvplot/polars.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/hvplot/polars.py b/hvplot/polars.py index 6b1320c37..8ec9d4ff7 100644 --- a/hvplot/polars.py +++ b/hvplot/polars.py @@ -5,16 +5,11 @@ from hvplot.converter import HoloViewsConverter from hvplot.util import is_list_like -try: - import polars as pl -except: - raise ImportError( - "Could not patch plotting API onto Polars. Polars could not be imported." - ) - class hvPlotTabularPolars(hvPlotTabular): def _get_converter(self, x=None, y=None, kind=None, **kwds): + import polars as pl + params = dict(self._metadata, **kwds) x = x or params.pop("x", None) y = y or params.pop("y", None) @@ -53,6 +48,12 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds): def patch(name="hvplot", extension="bokeh", logo=False): + try: + import polars as pl + except: + raise ImportError( + "Could not patch plotting API onto Polars. Polars could not be imported." + ) pl.api.register_dataframe_namespace(name)(hvPlotTabularPolars) pl.api.register_series_namespace(name)(hvPlotTabularPolars) pl.api.register_lazyframe_namespace(name)(hvPlotTabularPolars)