diff --git a/src/aces/aggregate.py b/src/aces/aggregate.py index 8dcdd98..589d74e 100644 --- a/src/aces/aggregate.py +++ b/src/aces/aggregate.py @@ -312,79 +312,77 @@ def aggregate_event_bound_window( ... datetime(year=1989, month=12, day=8, hour=16, minute=22), ... datetime(year=1989, month=12, day=10, hour=3, minute=7), # HAS EVENT BOUND ... ], - ... "_EVENT_INDEX": [0, 1, 2, 0, 1, 2, 3, 4], ... "is_A": [1, 0, 1, 1, 1, 1, 0, 0], ... "is_B": [0, 1, 0, 1, 0, 1, 1, 1], ... "is_C": [0, 1, 0, 0, 0, 1, 0, 1], + ... "_EVENT_INDEX": [0, 1, 2, 0, 1, 2, 3, 4], ... }) - >>> aggregate_event_bound_window(df, ToEventWindowBounds(True, "is_C", True, None)).drop( - ... "_EVENT_INDEX") + >>> aggregate_event_bound_window(df, ToEventWindowBounds(True, "is_C", True, None)).drop("timestamp") shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 3 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ - >>> aggregate_event_bound_window(df, ToEventWindowBounds(True, "is_C", False, None)).drop( - ... "_EVENT_INDEX") + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 3 ┆ 2 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ + >>> aggregate_event_bound_window(df, ToEventWindowBounds(True, "is_C", False, None)).drop("timestamp") shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ - >>> aggregate_event_bound_window(df, ToEventWindowBounds(False, "is_C", True, None)).drop( - ... "_EVENT_INDEX") + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 0 │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 ┆ 3 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ + >>> # unsure about event index for windows where start==end and no event (st_inclusive=False), null/# + >>> aggregate_event_bound_window(df, ToEventWindowBounds(False, "is_C", True, None)).drop("timestamp") shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> aggregate_event_bound_window(df, ToEventWindowBounds( - ... True, "is_C", True, timedelta(days=3))).drop("_EVENT_INDEX") + ... True, "is_C", True, timedelta(days=3))).drop("timestamp") shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-05 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-09 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-05 12:03:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-07 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-09 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ """ if not isinstance(endpoint_expr, ToEventWindowBounds): endpoint_expr = ToEventWindowBounds(*endpoint_expr) @@ -510,180 +508,180 @@ def boolean_expr_bound_sum( ... datetime(year=1989, month=12, day=8, hour=16, minute=22), ... datetime(year=1989, month=12, day=10, hour=3, minute=7), # HAS EVENT BOUND ... ], - ... "_EVENT_INDEX": [0, 1, 2, 0, 1, 2, 3, 4], ... "idx": [0, 1, 2, 3, 4, 5, 6, 7], ... "is_A": [1, 0, 1, 1, 1, 1, 0, 0], ... "is_B": [0, 1, 0, 1, 0, 1, 1, 1], ... "is_C": [0, 1, 0, 0, 0, 1, 0, 1], + ... "_EVENT_INDEX": [0, 1, 2, 0, 1, 2, 3, 4], ... }) >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "both", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 2 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 2 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 2 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "none", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "left", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 2 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 2 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 ┆ 3 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "right", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 1 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-05 15:17:00 ┆ 1 ┆ 0 ┆ 0 ┆ 2 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-06 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-08 16:22:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "both", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 2 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 2 ┆ 1 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 ┆ 4 │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "none", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 ┆ 3 │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "left", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 2 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 0 │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 2 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 ┆ 3 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "right", - ... ).drop("idx") + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> #### WITH OFFSET #### >>> boolean_expr_bound_sum( ... df, @@ -691,176 +689,176 @@ def boolean_expr_bound_sum( ... "bound_to_row", ... "both", ... offset = timedelta(days=3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-04 12:03:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-06 13:14:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-08 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 12:03:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 2 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-09 15:17:00 ┆ 2 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-11 16:22:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-13 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-04 12:03:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-06 13:14:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-08 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 12:03:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 2 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-09 15:17:00 ┆ 2 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-11 16:22:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-13 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "left", ... offset = timedelta(days=3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-04 12:03:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-06 13:14:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 1989-12-08 15:17:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 12:03:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 2 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-09 15:17:00 ┆ 2 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-11 16:22:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 1989-12-13 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-04 12:03:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-06 13:14:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-12-08 15:17:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 12:03:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 2 ┆ 1 ┆ 1 ┆ 2 │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-09 15:17:00 ┆ 2 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-11 16:22:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-13 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "none", ... timedelta(days=-3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 16:22:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 03:07:00 ┆ 1 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 16:22:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 03:07:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "bound_to_row", ... "right", ... offset = timedelta(days=-3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 16:22:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 03:07:00 ┆ 1 ┆ 1 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-05 16:22:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 03:07:00 ┆ 1 ┆ 1 ┆ 1 ┆ 2 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "both", ... offset = timedelta(days=3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-05 12:03:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-09 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-05 12:03:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 ┆ 4 │ + │ 2 ┆ 1989-12-07 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 ┆ 4 │ + │ 2 ┆ 1989-12-09 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 1 ┆ 4 │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "left", ... offset = timedelta(days=3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-12-05 12:03:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-07 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-09 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 1 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-05 12:03:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-07 13:14:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 ┆ 3 │ + │ 2 ┆ 1989-12-09 15:17:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ null ┆ null ┆ 0 ┆ 0 ┆ 0 ┆ null │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "none", ... offset = timedelta(days=-3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-11-28 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-11-30 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-02 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-11-29 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-01 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-03 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-05 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-07 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-11-28 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 0 │ + │ 1 ┆ 1989-11-30 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 0 │ + │ 1 ┆ 1989-12-02 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-11-29 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │ + │ 2 ┆ 1989-12-01 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │ + │ 2 ┆ 1989-12-03 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 0 ┆ 0 ┆ 0 ┆ null │ + │ 2 ┆ 1989-12-05 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 2 ┆ 1 ┆ 3 │ + │ 2 ┆ 1989-12-07 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 1 ┆ 0 ┆ 3 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ >>> boolean_expr_bound_sum( ... df, ... pl.col("idx").is_in([1, 4, 7]), ... "row_to_bound", ... "right", ... offset = timedelta(days=-3), - ... ).drop(["idx", "_EVENT_INDEX"]) + ... ).drop(["idx", "timestamp"]) shape: (8, 7) - ┌────────────┬─────────────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┐ - │ subject_id ┆ timestamp ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╡ - │ 1 ┆ 1989-12-01 12:03:00 ┆ 1989-11-28 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-03 13:14:00 ┆ 1989-11-30 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 │ - │ 1 ┆ 1989-12-05 15:17:00 ┆ 1989-12-02 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 │ - │ 2 ┆ 1989-12-02 12:03:00 ┆ 1989-11-29 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 2 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-04 13:14:00 ┆ 1989-12-01 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 2 ┆ 1 ┆ 0 │ - │ 2 ┆ 1989-12-06 15:17:00 ┆ 1989-12-03 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 │ - │ 2 ┆ 1989-12-08 16:22:00 ┆ 1989-12-05 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 │ - │ 2 ┆ 1989-12-10 03:07:00 ┆ 1989-12-07 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 │ - └────────────┴─────────────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┘ + ┌────────────┬─────────────────────┬─────────────────────┬──────┬──────┬──────┬──────────────┐ + │ subject_id ┆ timestamp_at_start ┆ timestamp_at_end ┆ is_A ┆ is_B ┆ is_C ┆ _EVENT_INDEX │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ datetime[μs] ┆ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════════╪═════════════════════╪══════╪══════╪══════╪══════════════╡ + │ 1 ┆ 1989-11-28 12:03:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-11-30 13:14:00 ┆ 1989-12-03 13:14:00 ┆ 1 ┆ 1 ┆ 1 ┆ 1 │ + │ 1 ┆ 1989-12-02 15:17:00 ┆ 1989-12-03 13:14:00 ┆ 0 ┆ 1 ┆ 1 ┆ 1 │ + │ 2 ┆ 1989-11-29 12:03:00 ┆ 1989-12-04 13:14:00 ┆ 2 ┆ 1 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-01 13:14:00 ┆ 1989-12-04 13:14:00 ┆ 2 ┆ 1 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-03 15:17:00 ┆ 1989-12-04 13:14:00 ┆ 1 ┆ 0 ┆ 0 ┆ 1 │ + │ 2 ┆ 1989-12-05 16:22:00 ┆ 1989-12-10 03:07:00 ┆ 1 ┆ 3 ┆ 2 ┆ 4 │ + │ 2 ┆ 1989-12-07 03:07:00 ┆ 1989-12-10 03:07:00 ┆ 0 ┆ 2 ┆ 1 ┆ 4 │ + └────────────┴─────────────────────┴─────────────────────┴──────┴──────┴──────┴──────────────┘ """ if mode not in ("bound_to_row", "row_to_bound"): raise ValueError(f"Mode '{mode}' invalid!") @@ -981,6 +979,7 @@ def boolean_expr_bound_sum( pl.col("timestamp_at_boundary").fill_null(strategy=fill_strategy).over("subject_id"), *sum_exprs.values(), "is_real", + EVENT_INDEX_COLUMN, ) .filter("is_real") .drop("is_real") @@ -1004,6 +1003,7 @@ def boolean_expr_bound_sum( st_timestamp_expr.alias("timestamp_at_start"), end_timestamp_expr.alias("timestamp_at_end"), *(pl.col(c).cast(PRED_CNT_TYPE).fill_null(0).alias(c) for c in cols), + EVENT_INDEX_COLUMN, ) if mode == "bound_to_row" and offset > timedelta(0): diff --git a/src/aces/extract_subtree.py b/src/aces/extract_subtree.py index bfb3592..1a5217c 100644 --- a/src/aces/extract_subtree.py +++ b/src/aces/extract_subtree.py @@ -8,7 +8,7 @@ from .aggregate import aggregate_event_bound_window, aggregate_temporal_window from .constraints import check_constraints -from .types import EVENT_INDEX_COLUMN, LAST_EVENT_INDEX_COLUMN +from .types import EVENT_INDEX_COLUMN def extract_subtree( @@ -144,7 +144,7 @@ def extract_subtree( >>> subtreee_anchor_realizations = ( ... predicates_df.filter(pl.col("is_admission") > 0) ... .rename({"timestamp": "subtree_anchor_timestamp"}) - ... ).select("subject_id", "subtree_anchor_timestamp") + ... ).select("subject_id", "subtree_anchor_timestamp", "_EVENT_INDEX") >>> print(subtreee_anchor_realizations) shape: (5, 3) ┌────────────┬──────────────────────────┬──────────────┐ @@ -330,7 +330,7 @@ def extract_subtree( pl.lit(child.name).alias("window_name"), "timestamp_at_start", "timestamp_at_end", - pl.col(EVENT_INDEX_COLUMN).alias(LAST_EVENT_INDEX_COLUMN), + pl.col(EVENT_INDEX_COLUMN), *predicate_cols, ).alias(f"{child.name}_summary"), )