Changed groupby to group_by for examples

ankane · Oct 31, 2023 · 06a901b · 06a901b
1 parent 6d05a3d
commit 06a901b
Show file tree

Hide file tree

Showing 13 changed files with 108 additions and 106 deletions.
diff --git a/lib/polars/data_frame.rb b/lib/polars/data_frame.rb
@@ -1791,13 +1791,13 @@ def with_row_count(name: "row_nr", offset: 0)
       _from_rbdf(_df.with_row_count(name, offset))
     end
 
-    # Start a groupby operation.
+    # Start a group by operation.
     #
     # @param by [Object]
     #   Column(s) to group by.
     # @param maintain_order [Boolean]
     #   Make sure that the order of the groups remain consistent. This is more
-    #   expensive than a default groupby. Note that this only works in expression
+    #   expensive than a default group by. Note that this only works in expression
     #   aggregations.
     #
     # @return [GroupBy]
@@ -1810,7 +1810,7 @@ def with_row_count(name: "row_nr", offset: 0)
     #       "c" => [6, 5, 4, 3, 2, 1]
     #     }
     #   )
-    #   df.groupby("a").agg(Polars.col("b").sum).sort("a")
+    #   df.group_by("a").agg(Polars.col("b").sum).sort("a")
     #   # =>
     #   # shape: (3, 2)
     #   # ┌─────┬─────┐
@@ -1824,7 +1824,7 @@ def with_row_count(name: "row_nr", offset: 0)
     #   # └─────┴─────┘
     def group_by(by, maintain_order: false)
       if !Utils.bool?(maintain_order)
-        raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
+        raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
       end
       GroupBy.new(
         self,
@@ -1839,9 +1839,9 @@ def group_by(by, maintain_order: false)
     #
     # Also works for index values of type `:i32` or `:i64`.
     #
-    # Different from a `dynamic_groupby` the windows are now determined by the
+    # Different from a `dynamic_group_by` the windows are now determined by the
     # individual values and are not of constant intervals. For constant intervals use
-    # *groupby_dynamic*
+    # *group_by_dynamic*
     #
     # The `period` and `offset` arguments are created either from a timedelta, or
     # by using the following string language:
@@ -1861,7 +1861,7 @@ def group_by(by, maintain_order: false)
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_rolling on an integer column, the windows are defined by:
+    # In case of a group_by_rolling on an integer column, the windows are defined by:
     #
     # - **"1i"      # length 1**
     # - **"10i"     # length 10**
@@ -1872,7 +1872,7 @@ def group_by(by, maintain_order: false)
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a rolling groupby on indices, dtype needs to be one of
+    #   In case of a rolling group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
     # @param period [Object]
@@ -1904,7 +1904,7 @@ def group_by(by, maintain_order: false)
     #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
     #     Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
     #   )
-    #   df.groupby_rolling(index_column: "dt", period: "2d").agg(
+    #   df.group_by_rolling(index_column: "dt", period: "2d").agg(
     #     [
     #       Polars.sum("a").alias("sum_a"),
     #       Polars.min("a").alias("min_a"),
@@ -1940,7 +1940,7 @@ def group_by_rolling(
     # Group based on a time value (or index value of type `:i32`, `:i64`).
     #
     # Time windows are calculated and rows are assigned to windows. Different from a
-    # normal groupby is that a row can be member of multiple groups. The time/index
+    # normal group by is that a row can be member of multiple groups. The time/index
     # window could be seen as a rolling window, with a window size determined by
     # dates/times/values instead of slots in the DataFrame.
     #
@@ -1968,7 +1968,7 @@ def group_by_rolling(
     # Or combine them:
     # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
     #
-    # In case of a groupby_dynamic on an integer column, the windows are defined by:
+    # In case of a group_by_dynamic on an integer column, the windows are defined by:
     #
     # - "1i"      # length 1
     # - "10i"     # length 10
@@ -1979,7 +1979,7 @@ def group_by_rolling(
     #   This column must be sorted in ascending order. If not the output will not
     #   make sense.
     #
-    #   In case of a dynamic groupby on indices, dtype needs to be one of
+    #   In case of a dynamic group by on indices, dtype needs to be one of
     #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
     #   performance matters use an `:i64` column.
     # @param every
@@ -2030,7 +2030,7 @@ def group_by_rolling(
     #   # └─────────────────────┴─────┘
     #
     # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
-    #   df.groupby_dynamic("time", every: "1h", closed: "right").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "right").agg(
     #     [
     #       Polars.col("time").min.alias("time_min"),
     #       Polars.col("time").max.alias("time_max")
@@ -2050,7 +2050,7 @@ def group_by_rolling(
     #   # └─────────────────────┴─────────────────────┴─────────────────────┘
     #
     # @example The window boundaries can also be added to the aggregation result.
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time", every: "1h", include_boundaries: true, closed: "right"
     #   ).agg([Polars.col("time").count.alias("time_count")])
     #   # =>
@@ -2067,7 +2067,7 @@ def group_by_rolling(
     #   # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
     # @example When closed="left", should not include right end of interval.
-    #   df.groupby_dynamic("time", every: "1h", closed: "left").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "left").agg(
     #     [
     #       Polars.col("time").count.alias("time_count"),
     #       Polars.col("time").alias("time_agg_list")
@@ -2087,7 +2087,7 @@ def group_by_rolling(
     #   # └─────────────────────┴────────────┴───────────────────────────────────┘
     #
     # @example When closed="both" the time values at the window boundaries belong to 2 groups.
-    #   df.groupby_dynamic("time", every: "1h", closed: "both").agg(
+    #   df.group_by_dynamic("time", every: "1h", closed: "both").agg(
     #     [Polars.col("time").count.alias("time_count")]
     #   )
     #   # =>
@@ -2104,7 +2104,7 @@ def group_by_rolling(
     #   # │ 2021-12-16 03:00:00 ┆ 1          │
     #   # └─────────────────────┴────────────┘
     #
-    # @example Dynamic groupbys can also be combined with grouping on normal keys.
+    # @example Dynamic group bys can also be combined with grouping on normal keys.
     #   df = Polars::DataFrame.new(
     #     {
     #       "time" => Polars.date_range(
@@ -2115,7 +2115,7 @@ def group_by_rolling(
     #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "time",
     #     every: "1h",
     #     closed: "both",
@@ -2138,14 +2138,14 @@ def group_by_rolling(
     #   # │ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1          │
     #   # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
     #
-    # @example Dynamic groupby on an index column.
+    # @example Dynamic group by on an index column.
     #   df = Polars::DataFrame.new(
     #     {
     #       "idx" => Polars.arange(0, 6, eager: true),
     #       "A" => ["A", "A", "B", "B", "B", "C"]
     #     }
     #   )
-    #   df.groupby_dynamic(
+    #   df.group_by_dynamic(
     #     "idx",
     #     every: "2i",
     #     period: "3i",

diff --git a/lib/polars/dynamic_group_by.rb b/lib/polars/dynamic_group_by.rb
@@ -2,7 +2,7 @@ module Polars
   # A dynamic grouper.
   #
   # This has an `.agg` method which allows you to run all polars expressions in a
-  # groupby context.
+  # group by context.
   class DynamicGroupBy
     def initialize(
       df,

diff --git a/lib/polars/expr.rb b/lib/polars/expr.rb
@@ -689,7 +689,7 @@ def is_not_nan
     #       "value" => [94, 95, 96, 97, 97, 99]
     #     }
     #   )
-    #   df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
+    #   df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
     #   # =>
     #   # shape: (2, 2)
     #   # ┌───────┬───────────┐
@@ -1236,7 +1236,7 @@ def cast(dtype, strict: true)
 
     # Sort this column. In projection/ selection context the whole column is sorted.
     #
-    # If used in a groupby context, the groups are sorted.
+    # If used in a group by context, the groups are sorted.
     #
     # @param reverse [Boolean]
     #   false -> order from small to large.
@@ -1294,7 +1294,7 @@ def cast(dtype, strict: true)
     #   # └───────┘
     #
     # @example
-    #   df.groupby("group").agg(Polars.col("value").sort)
+    #   df.group_by("group").agg(Polars.col("value").sort)
     #   # =>
     #   # shape: (2, 2)
     #   # ┌───────┬────────────┐
@@ -1503,7 +1503,7 @@ def search_sorted(element, side: "any")
     # Sort this column by the ordering of another column, or multiple other columns.
     #
     # In projection/ selection context the whole column is sorted.
-    # If used in a groupby context, the groups are sorted.
+    # If used in a group by context, the groups are sorted.
     #
     # @param by [Object]
     #   The column(s) used for sorting.
@@ -2210,7 +2210,7 @@ def last
 
     # Apply window function over a subgroup.
     #
-    # This is similar to a groupby + aggregation + self join.
+    # This is similar to a group by + aggregation + self join.
     # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
     #
     # @param expr [Object]
@@ -2485,7 +2485,7 @@ def quantile(quantile, interpolation: "nearest")
     #     }
     #   )
     #   (
-    #     df.groupby("group_col").agg(
+    #     df.group_by("group_col").agg(
     #       [
     #         Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
     #         Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
@@ -2523,7 +2523,7 @@ def filter(predicate)
     #     }
     #   )
     #   (
-    #     df.groupby("group_col").agg(
+    #     df.group_by("group_col").agg(
     #       [
     #         Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
     #         Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
@@ -2641,7 +2641,7 @@ def where(predicate)
     #
     # @example In a GroupBy context the function is applied by group:
     #   df.lazy
-    #     .groupby("b", maintain_order: true)
+    #     .group_by("b", maintain_order: true)
     #     .agg(
     #       [
     #         Polars.col("a").apply { |x| x.sum }
@@ -2680,7 +2680,7 @@ def where(predicate)
     #      "values" => [[1, 2], [2, 3], [4]]
     #    }
     #  )
-    #  df.groupby("group").agg(Polars.col("values").flatten)
+    #  df.group_by("group").agg(Polars.col("values").flatten)
     #  # =>
     #  # shape: (2, 2)
     #  # ┌───────┬───────────┐
@@ -3170,7 +3170,7 @@ def interpolate(method: "linear")
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3259,7 +3259,7 @@ def rolling_min(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3348,7 +3348,7 @@ def rolling_max(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3437,7 +3437,7 @@ def rolling_mean(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3526,7 +3526,7 @@ def rolling_sum(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3616,7 +3616,7 @@ def rolling_std(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3702,7 +3702,7 @@ def rolling_var(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -3791,7 +3791,7 @@ def rolling_median(
     #
     # @note
     #   If you want to compute multiple aggregation statistics over the same dynamic
-    #   window, consider using `groupby_rolling` this method can cache the window size
+    #   window, consider using `group_by_rolling` this method can cache the window size
     #   computation.
     #
     # @return [Expr]
@@ -4949,7 +4949,7 @@ def entropy(base: 2, normalize: true)
     #   Number of valid values there should be in the window before the expression
     #   is evaluated. valid values = `length - null_count`
     # @param parallel [Boolean]
-    #   Run in parallel. Don't do this in a groupby or another operation that
+    #   Run in parallel. Don't do this in a group by or another operation that
     #   already has much parallelization.
     #
     # @return [Expr]