Skip to content

Commit

Permalink
refactor: Make value_counts and unique_counts function expr non-anony…
Browse files Browse the repository at this point in the history
…mous (#11601)
  • Loading branch information
reswqa authored Oct 9, 2023
1 parent 311448a commit 0ae1b6e
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 22 deletions.
11 changes: 11 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,14 @@ pub(super) fn replace_time_zone(s: &[Series], time_zone: Option<&str>) -> Polars
let s2 = &s[1].utf8().unwrap();
Ok(polars_ops::prelude::replace_time_zone(ca, time_zone, s2)?.into_series())
}

#[cfg(feature = "dtype-struct")]
pub(super) fn value_counts(s: &Series, sort: bool, parallel: bool) -> PolarsResult<Series> {
s.value_counts(sort, parallel)
.map(|df| df.into_struct(s.name()).into_series())
}

#[cfg(feature = "unique_counts")]
pub(super) fn unique_counts(s: &Series) -> PolarsResult<Series> {
Ok(s.unique_counts().into_series())
}
15 changes: 15 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ pub enum FunctionExpr {
reverse: bool,
},
Reverse,
#[cfg(feature = "dtype-struct")]
ValueCounts {
sort: bool,
parallel: bool,
},
#[cfg(feature = "unique_counts")]
UniqueCounts,
Boolean(BooleanFunction),
#[cfg(feature = "approx_unique")]
ApproxNUnique,
Expand Down Expand Up @@ -349,6 +356,10 @@ impl Display for FunctionExpr {
Cumprod { .. } => "cumprod",
Cummin { .. } => "cummin",
Cummax { .. } => "cummax",
#[cfg(feature = "dtype-struct")]
ValueCounts { .. } => "value_counts",
#[cfg(feature = "unique_counts")]
UniqueCounts => "unique_counts",
Reverse => "reverse",
Boolean(func) => return write!(f, "{func}"),
#[cfg(feature = "approx_unique")]
Expand Down Expand Up @@ -628,6 +639,10 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
Cumprod { reverse } => map!(cum::cumprod, reverse),
Cummin { reverse } => map!(cum::cummin, reverse),
Cummax { reverse } => map!(cum::cummax, reverse),
#[cfg(feature = "dtype-struct")]
ValueCounts { sort, parallel } => map!(dispatch::value_counts, sort, parallel),
#[cfg(feature = "unique_counts")]
UniqueCounts => map!(dispatch::unique_counts),
Reverse => map!(dispatch::reverse),
Boolean(func) => func.into(),
#[cfg(feature = "approx_unique")]
Expand Down
9 changes: 9 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ impl FunctionExpr {
StructExpr(s) => s.get_field(mapper),
#[cfg(feature = "top_k")]
TopK(_) => mapper.with_same_dtype(),
#[cfg(feature = "dtype-struct")]
ValueCounts { .. } => mapper.map_dtype(|dt| {
DataType::Struct(vec![
Field::new(fields[0].name().as_str(), dt.clone()),
Field::new("counts", IDX_DTYPE),
])
}),
#[cfg(feature = "unique_counts")]
UniqueCounts => mapper.with_dtype(IDX_DTYPE),
Shift(..) | Reverse => mapper.with_same_dtype(),
Boolean(func) => func.get_field(mapper),
#[cfg(feature = "dtype-categorical")]
Expand Down
28 changes: 6 additions & 22 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1754,35 +1754,19 @@ impl Expr {
/// Count all unique values and create a struct mapping value to count.
/// (Note that it is better to turn parallel off in the aggregation context).
pub fn value_counts(self, sort: bool, parallel: bool) -> Self {
self.apply(
move |s| {
s.value_counts(sort, parallel)
.map(|df| Some(df.into_struct(s.name()).into_series()))
},
GetOutput::map_field(|fld| {
Field::new(
fld.name(),
DataType::Struct(vec![fld.clone(), Field::new("counts", IDX_DTYPE)]),
)
}),
)
.with_function_options(|mut opts| {
opts.pass_name_to_apply = true;
opts
})
.with_fmt("value_counts")
self.apply_private(FunctionExpr::ValueCounts { sort, parallel })
.with_function_options(|mut opts| {
opts.pass_name_to_apply = true;
opts
})
}

#[cfg(feature = "unique_counts")]
/// Returns a count of the unique values in the order of appearance.
/// This method differs from [`Expr::value_counts]` in that it does not return the
/// values, only the counts and might be faster.
pub fn unique_counts(self) -> Self {
self.apply(
|s| Ok(Some(s.unique_counts().into_series())),
GetOutput::from_type(IDX_DTYPE),
)
.with_fmt("unique_counts")
self.apply_private(FunctionExpr::UniqueCounts)
}

#[cfg(feature = "log")]
Expand Down

0 comments on commit 0ae1b6e

Please sign in to comment.