Skip to content

Commit

Permalink
feat: Implement is_between in Rust (pola-rs#11945)
Browse files Browse the repository at this point in the history
Co-authored-by: Stijn de Gooijer <stijndegooijer@gmail.com>
  • Loading branch information
2 people authored and r-brink committed Jan 24, 2024
1 parent 88d14f5 commit 19adb7e
Show file tree
Hide file tree
Showing 14 changed files with 104 additions and 19 deletions.
1 change: 1 addition & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ repeat_by = ["polars-plan/repeat_by"]
round_series = ["polars-plan/round_series", "polars-ops/round_series"]
is_first_distinct = ["polars-plan/is_first_distinct"]
is_last_distinct = ["polars-plan/is_last_distinct"]
is_between = ["polars-plan/is_between"]
is_unique = ["polars-plan/is_unique"]
cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"]
asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"]
Expand Down
1 change: 1 addition & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ is_first_distinct = []
is_last_distinct = []
is_unique = []
unique_counts = []
is_between = []
approx_unique = []
fused = []
cutqcut = ["dtype-categorical", "dtype-struct"]
Expand Down
34 changes: 34 additions & 0 deletions crates/polars-ops/src/series/ops/is_between.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use std::ops::BitAnd;

use polars_core::prelude::*;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ClosedInterval {
#[default]
Both,
Left,
Right,
None,
}

pub fn is_between(
s: &Series,
lower: &Series,
upper: &Series,
closed: ClosedInterval,
) -> PolarsResult<BooleanChunked> {
let left_cmp_op = match closed {
ClosedInterval::None | ClosedInterval::Right => Series::gt,
ClosedInterval::Both | ClosedInterval::Left => Series::gt_eq,
};
let right_cmp_op = match closed {
ClosedInterval::None | ClosedInterval::Left => Series::lt,
ClosedInterval::Both | ClosedInterval::Right => Series::lt_eq,
};
let left = left_cmp_op(s, lower)?;
let right = right_cmp_op(s, upper)?;
Ok(left.bitand(right))
}
4 changes: 4 additions & 0 deletions crates/polars-ops/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ mod floor_divide;
mod fused;
mod horizontal;
mod index;
#[cfg(feature = "is_between")]
mod is_between;
#[cfg(feature = "is_first_distinct")]
mod is_first_distinct;
#[cfg(feature = "is_in")]
Expand Down Expand Up @@ -74,6 +76,8 @@ pub use floor_divide::*;
pub use fused::*;
pub use horizontal::*;
pub use index::*;
#[cfg(feature = "is_between")]
pub use is_between::*;
#[cfg(feature = "is_first_distinct")]
pub use is_first_distinct::*;
#[cfg(feature = "is_in")]
Expand Down
1 change: 1 addition & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ round_series = ["polars-ops/round_series"]
is_first_distinct = ["polars-core/is_first_distinct", "polars-ops/is_first_distinct"]
is_last_distinct = ["polars-core/is_last_distinct", "polars-ops/is_last_distinct"]
is_unique = ["polars-ops/is_unique"]
is_between = ["polars-ops/is_between"]
cross_join = ["polars-ops/cross_join"]
asof_join = ["polars-core/asof_join", "polars-time", "polars-ops/asof_join"]
concat_str = []
Expand Down
16 changes: 16 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ pub enum BooleanFunction {
IsUnique,
#[cfg(feature = "is_unique")]
IsDuplicated,
#[cfg(feature = "is_between")]
IsBetween {
closed: ClosedInterval,
},
#[cfg(feature = "is_in")]
IsIn,
AllHorizontal,
Expand Down Expand Up @@ -61,6 +65,8 @@ impl Display for BooleanFunction {
IsUnique => "is_unique",
#[cfg(feature = "is_unique")]
IsDuplicated => "is_duplicated",
#[cfg(feature = "is_between")]
IsBetween { .. } => "is_between",
#[cfg(feature = "is_in")]
IsIn => "is_in",
AnyHorizontal => "any_horizontal",
Expand Down Expand Up @@ -91,6 +97,8 @@ impl From<BooleanFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
IsUnique => map!(is_unique),
#[cfg(feature = "is_unique")]
IsDuplicated => map!(is_duplicated),
#[cfg(feature = "is_between")]
IsBetween { closed } => map_as_slice!(is_between, closed),
#[cfg(feature = "is_in")]
IsIn => wrap!(is_in),
AllHorizontal => map_as_slice!(all_horizontal),
Expand Down Expand Up @@ -168,6 +176,14 @@ fn is_duplicated(s: &Series) -> PolarsResult<Series> {
polars_ops::prelude::is_duplicated(s).map(|ca| ca.into_series())
}

#[cfg(feature = "is_between")]
fn is_between(s: &[Series], closed: ClosedInterval) -> PolarsResult<Series> {
let ser = &s[0];
let lower = &s[1];
let upper = &s[2];
polars_ops::prelude::is_between(ser, lower, upper, closed).map(|ca| ca.into_series())
}

#[cfg(feature = "is_in")]
fn is_in(s: &mut [Series]) -> PolarsResult<Option<Series>> {
let left = &s[0];
Expand Down
11 changes: 11 additions & 0 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,17 @@ impl Expr {
self.apply_private(BooleanFunction::IsDuplicated.into())
}

#[allow(clippy::wrong_self_convention)]
#[cfg(feature = "is_between")]
pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
self.map_many_private(
BooleanFunction::IsBetween { closed }.into(),
&[lower.into(), upper.into()],
false,
true,
)
}

/// Get a mask of unique values.
#[allow(clippy::wrong_self_convention)]
#[cfg(feature = "is_unique")]
Expand Down
2 changes: 2 additions & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ find_many = ["polars-plan/find_many"]
fused = ["polars-ops/fused", "polars-lazy?/fused"]
group_by_list = ["polars-core/group_by_list", "polars-ops/group_by_list"]
interpolate = ["polars-ops/interpolate", "polars-lazy?/interpolate"]
is_between = ["polars-lazy?/is_between", "polars-ops/is_between"]
is_first_distinct = ["polars-lazy?/is_first_distinct", "polars-ops/is_first_distinct"]
is_in = ["polars-lazy?/is_in"]
is_last_distinct = ["polars-lazy?/is_last_distinct", "polars-ops/is_last_distinct"]
Expand Down Expand Up @@ -310,6 +311,7 @@ docs-selection = [
"checked_arithmetic",
"ndarray",
"repeat_by",
"is_between",
"is_first_distinct",
"is_last_distinct",
"asof_join",
Expand Down
1 change: 1 addition & 0 deletions crates/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@
//! - `repeat_by` - [Repeat element in an Array N times, where N is given by another array.
//! - `is_first_distinct` - Check if element is first unique value.
//! - `is_last_distinct` - Check if element is last unique value.
//! - `is_between` - Check if this expression is between the given lower and upper bounds.
//! - `checked_arithmetic` - checked arithmetic/ returning [`None`] on invalid operations.
//! - `dot_product` - Dot/inner product on [`Series`] and [`Expr`].
//! - `concat_str` - Concat string data in linear time.
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ features = [
"is_first_distinct",
"is_last_distinct",
"is_unique",
"is_between",
"lazy",
"list_eval",
"list_to_struct",
Expand Down
25 changes: 7 additions & 18 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5275,7 +5275,7 @@ def is_between(
closed: ClosedInterval = "both",
) -> Self:
"""
Check if this expression is between the given start and end values.
Check if this expression is between the given lower and upper bounds.
Parameters
----------
Expand Down Expand Up @@ -5351,23 +5351,12 @@ def is_between(
│ e ┆ false │
└─────┴────────────┘
"""
lower_bound = self._from_pyexpr(parse_as_expression(lower_bound))
upper_bound = self._from_pyexpr(parse_as_expression(upper_bound))

if closed == "none":
return (self > lower_bound) & (self < upper_bound)
elif closed == "both":
return (self >= lower_bound) & (self <= upper_bound)
elif closed == "right":
return (self > lower_bound) & (self <= upper_bound)
elif closed == "left":
return (self >= lower_bound) & (self < upper_bound)
else:
msg = (
"`closed` must be one of {'left', 'right', 'both', 'none'},"
f" got {closed!r}"
)
raise ValueError(msg)
lower_bound = parse_as_expression(lower_bound)
upper_bound = parse_as_expression(upper_bound)

return self._from_pyexpr(
self._pyexpr.is_between(lower_bound, upper_bound, closed)
)

def hash(
self,
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4188,7 +4188,7 @@ def is_between(
closed: ClosedInterval = "both",
) -> Series:
"""
Get a boolean mask of the values that fall between the given start/end values.
Get a boolean mask of the values that are between the given lower/upper bounds.
Parameters
----------
Expand Down
17 changes: 17 additions & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1545,6 +1545,23 @@ impl FromPyObject<'_> for Wrap<SearchSortedSide> {
}
}

impl FromPyObject<'_> for Wrap<ClosedInterval> {
fn extract(ob: &PyAny) -> PyResult<Self> {
let parsed = match ob.extract::<&str>()? {
"both" => ClosedInterval::Both,
"left" => ClosedInterval::Left,
"right" => ClosedInterval::Right,
"none" => ClosedInterval::None,
v => {
return Err(PyValueError::new_err(format!(
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
)))
},
};
Ok(Wrap(parsed))
}
}

impl FromPyObject<'_> for Wrap<WindowMapping> {
fn extract(ob: &PyAny) -> PyResult<Self> {
let parsed = match ob.extract::<&str>()? {
Expand Down
7 changes: 7 additions & 0 deletions py-polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,13 @@ impl PyExpr {
self.inner.clone().is_unique().into()
}

fn is_between(&self, lower: Self, upper: Self, closed: Wrap<ClosedInterval>) -> Self {
self.inner
.clone()
.is_between(lower.inner, upper.inner, closed.0)
.into()
}

fn approx_n_unique(&self) -> Self {
self.inner.clone().approx_n_unique().into()
}
Expand Down

0 comments on commit 19adb7e

Please sign in to comment.