From 875453b11e005b4bea2338497d3b7e9c6493a93d Mon Sep 17 00:00:00 2001 From: petrosbar Date: Sun, 8 Oct 2023 23:09:33 +0300 Subject: [PATCH 1/3] feat(rust): implement is_between on the rust side --- crates/polars-lazy/Cargo.toml | 1 + crates/polars-ops/Cargo.toml | 1 + .../polars-ops/src/series/ops/is_between.rs | 34 +++++++++++++++++++ crates/polars-ops/src/series/ops/mod.rs | 4 +++ crates/polars-plan/Cargo.toml | 1 + .../src/dsl/function_expr/boolean.rs | 16 +++++++++ crates/polars-plan/src/dsl/mod.rs | 11 ++++++ crates/polars/Cargo.toml | 2 ++ crates/polars/src/lib.rs | 1 + py-polars/Cargo.toml | 1 + .../source/reference/expressions/boolean.rst | 1 + .../source/reference/series/descriptive.rst | 1 + py-polars/polars/expr/expr.py | 25 ++++---------- py-polars/polars/series/series.py | 2 +- py-polars/src/conversion.rs | 17 ++++++++++ py-polars/src/expr/general.rs | 7 ++++ 16 files changed, 106 insertions(+), 19 deletions(-) create mode 100644 crates/polars-ops/src/series/ops/is_between.rs diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 8dfd8ea56b77..04d03ff42a42 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -92,6 +92,7 @@ repeat_by = ["polars-plan/repeat_by"] round_series = ["polars-plan/round_series", "polars-ops/round_series"] is_first_distinct = ["polars-plan/is_first_distinct"] is_last_distinct = ["polars-plan/is_last_distinct"] +is_between = ["polars-plan/is_between"] is_unique = ["polars-plan/is_unique"] cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"] asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"] diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml index 5a49b87c3f84..cb5e247a5087 100644 --- a/crates/polars-ops/Cargo.toml +++ b/crates/polars-ops/Cargo.toml @@ -69,6 +69,7 @@ is_first_distinct = [] is_last_distinct = [] is_unique = [] unique_counts = [] +is_between = [] approx_unique = [] fused = [] cutqcut = ["dtype-categorical", "dtype-struct"] diff --git a/crates/polars-ops/src/series/ops/is_between.rs b/crates/polars-ops/src/series/ops/is_between.rs new file mode 100644 index 000000000000..053493d552f6 --- /dev/null +++ b/crates/polars-ops/src/series/ops/is_between.rs @@ -0,0 +1,34 @@ +use std::ops::BitAnd; + +use polars_core::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum ClosedInterval { + #[default] + Both, + Left, + Right, + None, +} + +pub fn is_between( + s: &Series, + lower: &Series, + upper: &Series, + closed: ClosedInterval, +) -> PolarsResult { + let left_cmp_op = match closed { + ClosedInterval::None | ClosedInterval::Right => Series::gt, + ClosedInterval::Both | ClosedInterval::Left => Series::gt_eq, + }; + let right_cmp_op = match closed { + ClosedInterval::None | ClosedInterval::Left => Series::lt, + ClosedInterval::Both | ClosedInterval::Right => Series::lt_eq, + }; + let left = left_cmp_op(s, lower)?; + let right = right_cmp_op(s, upper)?; + Ok(left.bitand(right)) +} diff --git a/crates/polars-ops/src/series/ops/mod.rs b/crates/polars-ops/src/series/ops/mod.rs index bb82b5bbeb40..34c9aee8a23e 100644 --- a/crates/polars-ops/src/series/ops/mod.rs +++ b/crates/polars-ops/src/series/ops/mod.rs @@ -20,6 +20,8 @@ mod floor_divide; mod fused; mod horizontal; mod index; +#[cfg(feature = "is_between")] +mod is_between; #[cfg(feature = "is_first_distinct")] mod is_first_distinct; #[cfg(feature = "is_in")] @@ -74,6 +76,8 @@ pub use floor_divide::*; pub use fused::*; pub use horizontal::*; pub use index::*; +#[cfg(feature = "is_between")] +pub use is_between::*; #[cfg(feature = "is_first_distinct")] pub use is_first_distinct::*; #[cfg(feature = "is_in")] diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml index e736d7c6665e..1b546a776656 100644 --- a/crates/polars-plan/Cargo.toml +++ b/crates/polars-plan/Cargo.toml @@ -97,6 +97,7 @@ round_series = ["polars-ops/round_series"] is_first_distinct = ["polars-core/is_first_distinct", "polars-ops/is_first_distinct"] is_last_distinct = ["polars-core/is_last_distinct", "polars-ops/is_last_distinct"] is_unique = ["polars-ops/is_unique"] +is_between = ["polars-ops/is_between"] cross_join = ["polars-ops/cross_join"] asof_join = ["polars-core/asof_join", "polars-time", "polars-ops/asof_join"] concat_str = [] diff --git a/crates/polars-plan/src/dsl/function_expr/boolean.rs b/crates/polars-plan/src/dsl/function_expr/boolean.rs index b91a4e37ddd0..1ddef0b8d81a 100644 --- a/crates/polars-plan/src/dsl/function_expr/boolean.rs +++ b/crates/polars-plan/src/dsl/function_expr/boolean.rs @@ -28,6 +28,10 @@ pub enum BooleanFunction { IsUnique, #[cfg(feature = "is_unique")] IsDuplicated, + #[cfg(feature = "is_between")] + IsBetween { + closed: ClosedInterval, + }, #[cfg(feature = "is_in")] IsIn, AllHorizontal, @@ -61,6 +65,8 @@ impl Display for BooleanFunction { IsUnique => "is_unique", #[cfg(feature = "is_unique")] IsDuplicated => "is_duplicated", + #[cfg(feature = "is_between")] + IsBetween { .. } => "is_between", #[cfg(feature = "is_in")] IsIn => "is_in", AnyHorizontal => "any_horizontal", @@ -91,6 +97,8 @@ impl From for SpecialEq> { IsUnique => map!(is_unique), #[cfg(feature = "is_unique")] IsDuplicated => map!(is_duplicated), + #[cfg(feature = "is_between")] + IsBetween { closed } => map_as_slice!(is_between, closed), #[cfg(feature = "is_in")] IsIn => wrap!(is_in), AllHorizontal => map_as_slice!(all_horizontal), @@ -168,6 +176,14 @@ fn is_duplicated(s: &Series) -> PolarsResult { polars_ops::prelude::is_duplicated(s).map(|ca| ca.into_series()) } +#[cfg(feature = "is_between")] +fn is_between(s: &[Series], closed: ClosedInterval) -> PolarsResult { + let ser = &s[0]; + let lower = &s[1]; + let upper = &s[2]; + polars_ops::prelude::is_between(ser, lower, upper, closed).map(|ca| ca.into_series()) +} + #[cfg(feature = "is_in")] fn is_in(s: &mut [Series]) -> PolarsResult> { let left = &s[0]; diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index ee8b242de98a..492c7c45ba93 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -971,6 +971,17 @@ impl Expr { self.apply_private(BooleanFunction::IsDuplicated.into()) } + #[allow(clippy::wrong_self_convention)] + #[cfg(feature = "is_between")] + pub fn is_between>(self, lower: E, upper: E, closed: ClosedInterval) -> Self { + self.map_many_private( + BooleanFunction::IsBetween { closed }.into(), + &[lower.into(), upper.into()], + false, + true, + ) + } + /// Get a mask of unique values. #[allow(clippy::wrong_self_convention)] #[cfg(feature = "is_unique")] diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index cb79ee5c5f43..d0631bed6141 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -147,6 +147,7 @@ find_many = ["polars-plan/find_many"] fused = ["polars-ops/fused", "polars-lazy?/fused"] group_by_list = ["polars-core/group_by_list", "polars-ops/group_by_list"] interpolate = ["polars-ops/interpolate", "polars-lazy?/interpolate"] +is_between = ["polars-lazy?/is_between", "polars-ops/is_between"] is_first_distinct = ["polars-lazy?/is_first_distinct", "polars-ops/is_first_distinct"] is_in = ["polars-lazy?/is_in"] is_last_distinct = ["polars-lazy?/is_last_distinct", "polars-ops/is_last_distinct"] @@ -310,6 +311,7 @@ docs-selection = [ "checked_arithmetic", "ndarray", "repeat_by", + "is_between", "is_first_distinct", "is_last_distinct", "asof_join", diff --git a/crates/polars/src/lib.rs b/crates/polars/src/lib.rs index b21d813f99fc..415ee131d77d 100644 --- a/crates/polars/src/lib.rs +++ b/crates/polars/src/lib.rs @@ -240,6 +240,7 @@ //! - `repeat_by` - [Repeat element in an Array N times, where N is given by another array. //! - `is_first_distinct` - Check if element is first unique value. //! - `is_last_distinct` - Check if element is last unique value. +//! - `is_between` - Check if this expression is between the given lower and upper bounds. //! - `checked_arithmetic` - checked arithmetic/ returning [`None`] on invalid operations. //! - `dot_product` - Dot/inner product on [`Series`] and [`Expr`]. //! - `concat_str` - Concat string data in linear time. diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index 088e1609c9c1..9bc7e70f73bc 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -54,6 +54,7 @@ features = [ "is_first_distinct", "is_last_distinct", "is_unique", + "is_between", "lazy", "list_eval", "list_to_struct", diff --git a/py-polars/docs/source/reference/expressions/boolean.rst b/py-polars/docs/source/reference/expressions/boolean.rst index 73c68917d515..2575c6426ae6 100644 --- a/py-polars/docs/source/reference/expressions/boolean.rst +++ b/py-polars/docs/source/reference/expressions/boolean.rst @@ -17,6 +17,7 @@ Boolean Expr.is_infinite Expr.is_last Expr.is_last_distinct + Expr.is_between Expr.is_nan Expr.is_not Expr.is_not_nan diff --git a/py-polars/docs/source/reference/series/descriptive.rst b/py-polars/docs/source/reference/series/descriptive.rst index 6ec39e326b9f..b5c952deffbf 100644 --- a/py-polars/docs/source/reference/series/descriptive.rst +++ b/py-polars/docs/source/reference/series/descriptive.rst @@ -22,6 +22,7 @@ Descriptive Series.is_integer Series.is_last Series.is_last_distinct + Series.is_between Series.is_nan Series.is_not_nan Series.is_not_null diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index fb801ca6e5c9..e5eb450de989 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -5275,7 +5275,7 @@ def is_between( closed: ClosedInterval = "both", ) -> Self: """ - Check if this expression is between the given start and end values. + Check if this expression is between the given lower and upper bounds. Parameters ---------- @@ -5351,23 +5351,12 @@ def is_between( │ e ┆ false │ └─────┴────────────┘ """ - lower_bound = self._from_pyexpr(parse_as_expression(lower_bound)) - upper_bound = self._from_pyexpr(parse_as_expression(upper_bound)) - - if closed == "none": - return (self > lower_bound) & (self < upper_bound) - elif closed == "both": - return (self >= lower_bound) & (self <= upper_bound) - elif closed == "right": - return (self > lower_bound) & (self <= upper_bound) - elif closed == "left": - return (self >= lower_bound) & (self < upper_bound) - else: - msg = ( - "`closed` must be one of {'left', 'right', 'both', 'none'}," - f" got {closed!r}" - ) - raise ValueError(msg) + lower_bound = parse_as_expression(lower_bound) + upper_bound = parse_as_expression(upper_bound) + + return self._from_pyexpr( + self._pyexpr.is_between(lower_bound, upper_bound, closed) + ) def hash( self, diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 2fe47f2c56b8..cc34abb4f69e 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4188,7 +4188,7 @@ def is_between( closed: ClosedInterval = "both", ) -> Series: """ - Get a boolean mask of the values that fall between the given start/end values. + Get a boolean mask of the values that are between the given lower/upper bounds. Parameters ---------- diff --git a/py-polars/src/conversion.rs b/py-polars/src/conversion.rs index 286decb67fed..be607cfb7973 100644 --- a/py-polars/src/conversion.rs +++ b/py-polars/src/conversion.rs @@ -1545,6 +1545,23 @@ impl FromPyObject<'_> for Wrap { } } +impl FromPyObject<'_> for Wrap { + fn extract(ob: &PyAny) -> PyResult { + let parsed = match ob.extract::<&str>()? { + "both" => ClosedInterval::Both, + "left" => ClosedInterval::Left, + "right" => ClosedInterval::Right, + "none" => ClosedInterval::None, + v => { + return Err(PyValueError::new_err(format!( + "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}", + ))) + }, + }; + Ok(Wrap(parsed)) + } +} + impl FromPyObject<'_> for Wrap { fn extract(ob: &PyAny) -> PyResult { let parsed = match ob.extract::<&str>()? { diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index ae545778d3c5..e7df96a30bfa 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -400,6 +400,13 @@ impl PyExpr { self.inner.clone().is_unique().into() } + fn is_between(&self, lower: Self, upper: Self, closed: Wrap) -> Self { + self.clone() + .inner + .is_between(lower.inner, upper.inner, closed.0) + .into() + } + fn approx_n_unique(&self) -> Self { self.inner.clone().approx_n_unique().into() } From a6d152de9f1ea4b50016d64d67926adecb429e3a Mon Sep 17 00:00:00 2001 From: petrosbar Date: Tue, 24 Oct 2023 22:11:04 +0300 Subject: [PATCH 2/3] be consistent with cloning PyExpr --- py-polars/src/expr/general.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index e7df96a30bfa..88f7915a1fbc 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -401,8 +401,8 @@ impl PyExpr { } fn is_between(&self, lower: Self, upper: Self, closed: Wrap) -> Self { - self.clone() - .inner + self.inner + .clone() .is_between(lower.inner, upper.inner, closed.0) .into() } From e6f27f1dd401cd407539a278f66fc17074de8e8a Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 11 Jan 2024 19:52:53 +0100 Subject: [PATCH 3/3] Remove duplicate doc entries --- py-polars/docs/source/reference/expressions/boolean.rst | 1 - py-polars/docs/source/reference/series/descriptive.rst | 1 - 2 files changed, 2 deletions(-) diff --git a/py-polars/docs/source/reference/expressions/boolean.rst b/py-polars/docs/source/reference/expressions/boolean.rst index 2575c6426ae6..73c68917d515 100644 --- a/py-polars/docs/source/reference/expressions/boolean.rst +++ b/py-polars/docs/source/reference/expressions/boolean.rst @@ -17,7 +17,6 @@ Boolean Expr.is_infinite Expr.is_last Expr.is_last_distinct - Expr.is_between Expr.is_nan Expr.is_not Expr.is_not_nan diff --git a/py-polars/docs/source/reference/series/descriptive.rst b/py-polars/docs/source/reference/series/descriptive.rst index b5c952deffbf..6ec39e326b9f 100644 --- a/py-polars/docs/source/reference/series/descriptive.rst +++ b/py-polars/docs/source/reference/series/descriptive.rst @@ -22,7 +22,6 @@ Descriptive Series.is_integer Series.is_last Series.is_last_distinct - Series.is_between Series.is_nan Series.is_not_nan Series.is_not_null