Skip to content

Commit

Permalink
Added rle and rle_id methods to Series and Expr
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Nov 21, 2023
1 parent 844cf23 commit 135fdc7
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## 0.7.1 (unreleased)

- Added `cut` and `qcut` methods to `Series` and `Expr`
- Added `rle` and `rle_id` methods to `Series` and `Expr`
- Added `bottom_k` method to `Series`
- Fixed error with `top_k` method

Expand Down
1 change: 1 addition & 0 deletions ext/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ features = [
"range",
"reinterpret",
"repeat_by",
"rle",
"rolling_window",
"round_series",
"row_hash",
Expand Down
8 changes: 8 additions & 0 deletions ext/polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ impl RbExpr {
.into()
}

pub fn rle(&self) -> Self {
self.inner.clone().rle().into()
}

pub fn rle_id(&self) -> Self {
self.inner.clone().rle_id().into()
}

pub fn agg_groups(&self) -> Self {
self.clone().inner.agg_groups().into()
}
Expand Down
2 changes: 2 additions & 0 deletions ext/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
class.define_method("cut", method!(RbExpr::cut, 4))?;
class.define_method("qcut", method!(RbExpr::qcut, 5))?;
class.define_method("qcut_uniform", method!(RbExpr::qcut_uniform, 5))?;
class.define_method("rle", method!(RbExpr::rle, 0))?;
class.define_method("rle_id", method!(RbExpr::rle_id, 0))?;
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
class.define_method("count", method!(RbExpr::count, 0))?;
class.define_method("value_counts", method!(RbExpr::value_counts, 2))?;
Expand Down
53 changes: 53 additions & 0 deletions lib/polars/expr.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2625,6 +2625,59 @@ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, in
wrap_expr(rbexpr)
end

# Get the lengths of runs of identical values.
#
# @return [Expr]
#
# @example
# df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
# df.select(Polars.col("s").rle).unnest("s")
# # =>
# # shape: (6, 2)
# # ┌─────────┬────────┐
# # │ lengths ┆ values │
# # │ --- ┆ --- │
# # │ i32 ┆ i64 │
# # ╞═════════╪════════╡
# # │ 2 ┆ 1 │
# # │ 1 ┆ 2 │
# # │ 1 ┆ 1 │
# # │ 1 ┆ null │
# # │ 1 ┆ 1 │
# # │ 2 ┆ 3 │
# # └─────────┴────────┘
def rle
wrap_expr(_rbexpr.rle)
end

# Map values to run IDs.
#
# Similar to RLE, but it maps each value to an ID corresponding to the run into
# which it falls. This is especially useful when you want to define groups by
# runs of identical values rather than the values themselves.
#
# @return [Expr]
#
# @example
# df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
# df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
# # =>
# # shape: (5, 4)
# # ┌─────┬──────┬─────┬──────┐
# # │ a ┆ b ┆ a_r ┆ ab_r │
# # │ --- ┆ --- ┆ --- ┆ --- │
# # │ i64 ┆ str ┆ u32 ┆ u32 │
# # ╞═════╪══════╪═════╪══════╡
# # │ 1 ┆ x ┆ 0 ┆ 0 │
# # │ 2 ┆ x ┆ 1 ┆ 1 │
# # │ 1 ┆ null ┆ 2 ┆ 2 │
# # │ 1 ┆ y ┆ 2 ┆ 3 │
# # │ 1 ┆ y ┆ 2 ┆ 3 │
# # └─────┴──────┴─────┴──────┘
def rle_id
wrap_expr(_rbexpr.rle_id)
end

# Filter a single column.
#
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
Expand Down
53 changes: 53 additions & 0 deletions lib/polars/series.rb
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,59 @@ def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, in
result
end

# Get the lengths of runs of identical values.
#
# @return [Series]
#
# @example
# s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
# s.rle.struct.unnest
# # =>
# # shape: (6, 2)
# # ┌─────────┬────────┐
# # │ lengths ┆ values │
# # │ --- ┆ --- │
# # │ i32 ┆ i64 │
# # ╞═════════╪════════╡
# # │ 2 ┆ 1 │
# # │ 1 ┆ 2 │
# # │ 1 ┆ 1 │
# # │ 1 ┆ null │
# # │ 1 ┆ 1 │
# # │ 2 ┆ 3 │
# # └─────────┴────────┘
def rle
super
end

# Map values to run IDs.
#
# Similar to RLE, but it maps each value to an ID corresponding to the run into
# which it falls. This is especially useful when you want to define groups by
# runs of identical values rather than the values themselves.
#
# @return [Series]
#
# @example
# s = Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3])
# s.rle_id()
# # =>
# # shape: (8,)
# # Series: 's' [u32]
# # [
# # 0
# # 0
# # 1
# # 2
# # 3
# # 4
# # 5
# # 5
# # ]
def rle_id
super
end

# Count the unique values in a Series.
#
# @param sort [Boolean]
Expand Down

0 comments on commit 135fdc7

Please sign in to comment.