Skip to content

Commit

Permalink
Impl for lazy
Browse files Browse the repository at this point in the history
  • Loading branch information
mcrumiller committed Oct 12, 2023
1 parent 691e002 commit 87bf00d
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 36 deletions.
32 changes: 3 additions & 29 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3966,7 +3966,7 @@ def filter(
Provided multiple filters using alternative syntax:
>>> df.filter(pl.col("foo") == 1, pl.col("ham") == "a")
shape: (2, 3)
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
Expand All @@ -3977,7 +3977,7 @@ def filter(
Use column-supplied filters:
>>> df.filter(foo=1, ham="a")
shape: (2, 3)
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
Expand All @@ -3987,34 +3987,8 @@ def filter(
└─────┴─────┴─────┘
"""
has_predicates = len(predicates) > 0
has_constraints = len(constraints) > 0
if has_predicates:
# convert numpy boolean arrays to Series if supplied
predicates = [ # type: ignore[assignment]
pl.Series(predicate)
if _check_for_numpy(predicate) and isinstance(predicate, np.ndarray)
else predicate
for predicate in predicates
]

# if multiple predicates are supplied, perform logical AND
if len(predicates) > 1:
predicates = F.all_horizontal(predicates) # type: ignore[assignment, arg-type]
else:
predicates = predicates[0] # type: ignore[assignment]
if has_constraints:
# basic column filters provided
constraints = F.all_horizontal(col(k) == v for k, v in constraints.items()) # type: ignore[assignment]

if has_predicates and has_constraints:
predicates = predicates & constraints # type: ignore[operator]
elif has_constraints:
# only constraints were supplied, assign as predicates
predicates = constraints # type: ignore[assignment]

return (
self.lazy().filter(predicates).collect(_eager=True) # type: ignore[arg-type]
self.lazy().filter(*predicates, **constraints).collect(_eager=True) # type: ignore[arg-type]
)

@overload
Expand Down
54 changes: 47 additions & 7 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
Utf8,
py_type_to_dtype,
)
from polars.dependencies import dataframe_api_compat, subprocess
from polars.dependencies import _check_for_numpy, dataframe_api_compat, subprocess
from polars.dependencies import numpy as np
from polars.io._utils import _is_local_file, _is_supported_cloud
from polars.io.csv._utils import _check_arg_is_1byte
from polars.io.ipc.anonymous_scan import _scan_ipc_fsspec
Expand Down Expand Up @@ -2506,16 +2507,18 @@ def clone(self) -> Self:
"""
return self._from_pyldf(self._ldf.clone())

def filter(self, predicate: IntoExpr) -> Self:
def filter(self, *predicates: IntoExpr, **constraints: dict[str, Any]) -> Self:
"""
Filter the rows in the LazyFrame based on a predicate expression.
The original order of the remaining rows is preserved.
Parameters
----------
predicate
predicates
Expression that evaluates to a boolean Series.
constraints
Column filters. Use name=value to filter column name by the supplied value.
Examples
--------
Expand Down Expand Up @@ -2552,6 +2555,18 @@ def filter(self, predicate: IntoExpr) -> Self:
│ 1 ┆ 6 ┆ a │
└─────┴─────┴─────┘
Provided multiple filters using alternative syntax:
>>> lf.filter(pl.col("foo") == 1, pl.col("ham") == "a").collect()
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6 ┆ a │
└─────┴─────┴─────┘
Filter on an OR condition:
>>> lf.filter((pl.col("foo") == 1) | (pl.col("ham") == "c")).collect()
Expand All @@ -2566,11 +2581,36 @@ def filter(self, predicate: IntoExpr) -> Self:
└─────┴─────┴─────┘
"""
if isinstance(predicate, list):
predicate = pl.Series(predicate)
has_predicates = len(predicates) > 0
has_constraints = len(constraints) > 0

if not (has_predicates or has_constraints):
raise ValueError("No predicates or constraints provided")

if has_predicates:
predicates = [ # type: ignore[assignment]
pl.Series(predicate)
if isinstance(predicate, list)
or (_check_for_numpy(predicate) and isinstance(predicate, np.ndarray))
else predicate
for predicate in predicates
]
if len(predicates) > 1:
# multiple predicates supplied, AND them together
predicates = F.all_horizontal(*predicates) # type: ignore[assignment]
else:
predicates = predicates[0] # type: ignore[assignment]

if has_constraints:
constraints = F.all_horizontal(F.col(k) == v for k, v in constraints.items()) # type: ignore[assignment]

if has_predicates and has_constraints:
predicates = predicates & constraints # type: ignore[operator]
elif has_constraints:
predicates = constraints # type: ignore[assignment]

predicate = parse_as_expression(predicate)
return self._from_pyldf(self._ldf.filter(predicate))
predicates = parse_as_expression(predicates) # type: ignore[assignment, arg-type]
return self._from_pyldf(self._ldf.filter(predicates))

def select(
self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
Expand Down

0 comments on commit 87bf00d

Please sign in to comment.