Skip to content

Commit

Permalink
feat: always preserve sorted flag for .dt.date
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Sep 11, 2024
1 parent d8acacf commit 14b18e0
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 42 deletions.
51 changes: 21 additions & 30 deletions crates/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,20 +247,20 @@ pub(super) fn time(s: &Series) -> PolarsResult<Series> {
pub(super) fn date(s: &Series) -> PolarsResult<Series> {
match s.dtype() {
#[cfg(feature = "timezones")]
DataType::Datetime(_, Some(tz)) => {
let mut out = {
polars_ops::chunked_array::replace_time_zone(
s.datetime().unwrap(),
None,
&StringChunked::from_iter(std::iter::once("raise")),
NonExistent::Raise,
)?
.cast(&DataType::Date)?
};
if tz != "UTC" {
// DST transitions may not preserve sortedness.
out.set_sorted_flag(IsSorted::Not);
}
DataType::Datetime(_, Some(_)) => {
let mut out = polars_ops::chunked_array::replace_time_zone(
s.datetime().unwrap(),
None,
&StringChunked::from_iter(std::iter::once("raise")),
NonExistent::Raise,
)?
.cast(&DataType::Date)?;
// `replace_time_zone` may unset sorted flag. But, we're only taking the date
// part of the result, so we can safely preserve the sorted flag here. We may
// need to make an exception if a time zone introduces a change which involves
// "going back in time" and repeating a day, but we're not aware of that ever
// having happened.
out.set_sorted_flag(s.is_sorted_flag());
Ok(out)
},
DataType::Datetime(_, _) => s.datetime().unwrap().cast(&DataType::Date),
Expand All @@ -271,22 +271,13 @@ pub(super) fn date(s: &Series) -> PolarsResult<Series> {
pub(super) fn datetime(s: &Series) -> PolarsResult<Series> {
match s.dtype() {
#[cfg(feature = "timezones")]
DataType::Datetime(tu, Some(tz)) => {
let mut out = {
polars_ops::chunked_array::replace_time_zone(
s.datetime().unwrap(),
None,
&StringChunked::from_iter(std::iter::once("raise")),
NonExistent::Raise,
)?
.cast(&DataType::Datetime(*tu, None))?
};
if tz != "UTC" {
// DST transitions may not preserve sortedness.
out.set_sorted_flag(IsSorted::Not);
}
Ok(out)
},
DataType::Datetime(tu, Some(_)) => polars_ops::chunked_array::replace_time_zone(
s.datetime().unwrap(),
None,
&StringChunked::from_iter(std::iter::once("raise")),
NonExistent::Raise,
)?
.cast(&DataType::Datetime(*tu, None)),
DataType::Datetime(tu, _) => s.datetime().unwrap().cast(&DataType::Datetime(*tu, None)),
dtype => polars_bail!(ComputeError: "expected Datetime, got {}", dtype),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,26 +125,19 @@ def test_dt_datetime_deprecated() -> None:
assert result.item() == expected


@pytest.mark.parametrize(
("time_zone", "expected"),
[
(None, True),
("Asia/Kathmandu", False),
("UTC", True),
],
)
def test_local_date_sortedness(time_zone: str | None, expected: bool) -> None:
# singleton - always sorted
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
def test_local_date_sortedness(time_zone: str | None) -> None:
# singleton
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
result = ser.dt.date()
assert result.flags["SORTED_ASC"]

# 2 elements - depends on time zone
# 2 elements
ser = (
pl.Series([datetime(2022, 1, 1, 23)] * 2).dt.replace_time_zone(time_zone)
).sort()
result = ser.dt.date()
assert result.flags["SORTED_ASC"] >= expected
assert result.flags["SORTED_ASC"]


@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
Expand Down

0 comments on commit 14b18e0

Please sign in to comment.