diff --git a/crates/polars-plan/src/dsl/function_expr/dispatch.rs b/crates/polars-plan/src/dsl/function_expr/dispatch.rs index d0f652c7be59..7b17be78b571 100644 --- a/crates/polars-plan/src/dsl/function_expr/dispatch.rs +++ b/crates/polars-plan/src/dsl/function_expr/dispatch.rs @@ -40,3 +40,14 @@ pub(super) fn replace_time_zone(s: &[Series], time_zone: Option<&str>) -> Polars let s2 = &s[1].utf8().unwrap(); Ok(polars_ops::prelude::replace_time_zone(ca, time_zone, s2)?.into_series()) } + +#[cfg(feature = "dtype-struct")] +pub(super) fn value_counts(s: &Series, sort: bool, parallel: bool) -> PolarsResult { + s.value_counts(sort, parallel) + .map(|df| df.into_struct(s.name()).into_series()) +} + +#[cfg(feature = "unique_counts")] +pub(super) fn unique_counts(s: &Series) -> PolarsResult { + Ok(s.unique_counts().into_series()) +} diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 1fdcba2d0111..7e900543d286 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -166,6 +166,13 @@ pub enum FunctionExpr { reverse: bool, }, Reverse, + #[cfg(feature = "dtype-struct")] + ValueCounts { + sort: bool, + parallel: bool, + }, + #[cfg(feature = "unique_counts")] + UniqueCounts, Boolean(BooleanFunction), #[cfg(feature = "approx_unique")] ApproxNUnique, @@ -349,6 +356,10 @@ impl Display for FunctionExpr { Cumprod { .. } => "cumprod", Cummin { .. } => "cummin", Cummax { .. } => "cummax", + #[cfg(feature = "dtype-struct")] + ValueCounts { .. } => "value_counts", + #[cfg(feature = "unique_counts")] + UniqueCounts => "unique_counts", Reverse => "reverse", Boolean(func) => return write!(f, "{func}"), #[cfg(feature = "approx_unique")] @@ -628,6 +639,10 @@ impl From for SpecialEq> { Cumprod { reverse } => map!(cum::cumprod, reverse), Cummin { reverse } => map!(cum::cummin, reverse), Cummax { reverse } => map!(cum::cummax, reverse), + #[cfg(feature = "dtype-struct")] + ValueCounts { sort, parallel } => map!(dispatch::value_counts, sort, parallel), + #[cfg(feature = "unique_counts")] + UniqueCounts => map!(dispatch::unique_counts), Reverse => map!(dispatch::reverse), Boolean(func) => func.into(), #[cfg(feature = "approx_unique")] diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index bc5c435b99cd..707aec88edee 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -154,6 +154,15 @@ impl FunctionExpr { StructExpr(s) => s.get_field(mapper), #[cfg(feature = "top_k")] TopK(_) => mapper.with_same_dtype(), + #[cfg(feature = "dtype-struct")] + ValueCounts { .. } => mapper.map_dtype(|dt| { + DataType::Struct(vec![ + Field::new(fields[0].name().as_str(), dt.clone()), + Field::new("counts", IDX_DTYPE), + ]) + }), + #[cfg(feature = "unique_counts")] + UniqueCounts => mapper.with_dtype(IDX_DTYPE), Shift(..) | Reverse => mapper.with_same_dtype(), Boolean(func) => func.get_field(mapper), #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 0cd6d7673e7e..6b2e00ca5e33 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -1754,23 +1754,11 @@ impl Expr { /// Count all unique values and create a struct mapping value to count. /// (Note that it is better to turn parallel off in the aggregation context). pub fn value_counts(self, sort: bool, parallel: bool) -> Self { - self.apply( - move |s| { - s.value_counts(sort, parallel) - .map(|df| Some(df.into_struct(s.name()).into_series())) - }, - GetOutput::map_field(|fld| { - Field::new( - fld.name(), - DataType::Struct(vec![fld.clone(), Field::new("counts", IDX_DTYPE)]), - ) - }), - ) - .with_function_options(|mut opts| { - opts.pass_name_to_apply = true; - opts - }) - .with_fmt("value_counts") + self.apply_private(FunctionExpr::ValueCounts { sort, parallel }) + .with_function_options(|mut opts| { + opts.pass_name_to_apply = true; + opts + }) } #[cfg(feature = "unique_counts")] @@ -1778,11 +1766,7 @@ impl Expr { /// This method differs from [`Expr::value_counts]` in that it does not return the /// values, only the counts and might be faster. pub fn unique_counts(self) -> Self { - self.apply( - |s| Ok(Some(s.unique_counts().into_series())), - GetOutput::from_type(IDX_DTYPE), - ) - .with_fmt("unique_counts") + self.apply_private(FunctionExpr::UniqueCounts) } #[cfg(feature = "log")]