Skip to content

Commit

Permalink
fix: fix match on last item for join_asof with strategy="nearest" (
Browse files Browse the repository at this point in the history
  • Loading branch information
mcrumiller authored Oct 12, 2023
1 parent 86aafa8 commit da08773
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 8 deletions.
10 changes: 7 additions & 3 deletions crates/polars-ops/src/frame/join/asof/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ pub(super) fn join_asof_nearest_with_tolerance<
}

// We made it to the window: matches are now possible, start measuring distance.
found_window = true;
let current_dist = if val_l > val_r {
val_l - val_r
} else {
Expand All @@ -259,10 +258,15 @@ pub(super) fn join_asof_nearest_with_tolerance<
break;
}
} else {
// We'ved moved farther away, so the last element was the match.
out.push(Some(offset - 1));
// We'ved moved farther away, so the last element was the match if it's within tolerance
if found_window {
out.push(Some(offset - 1));
} else {
out.push(None);
}
break;
}
found_window = true;
offset += 1;
}
}
Expand Down
10 changes: 7 additions & 3 deletions crates/polars-ops/src/frame/join/asof/groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ pub(super) unsafe fn join_asof_nearest_with_indirection_and_tolerance<
}

// We made it to the window: matches are now possible, start measuring distance.
found_window = true;
let current_dist = if val_l > val_r {
val_l - val_r
} else {
Expand All @@ -141,9 +140,14 @@ pub(super) unsafe fn join_asof_nearest_with_indirection_and_tolerance<
}
prev_offset = offset;
} else {
// We'ved moved farther away, so the last element was the match.
return (Some(prev_offset), idx - 1);
// We'ved moved farther away, so the last element was the match if it's within tolerance
if found_window {
return (Some(prev_offset), idx - 1);
} else {
return (None, n_right - 1);
}
}
found_window = true;
}

// This should be unreachable.
Expand Down
87 changes: 85 additions & 2 deletions py-polars/tests/unit/operations/test_join_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,14 +455,12 @@ def test_asof_join_nearest() -> None:
"a": [1, 2, 3, 4, 5],
}
).set_sorted("asof_key")

df2 = pl.DataFrame(
{
"asof_key": [1, 2, 3, 10],
"b": [1, 2, 3, 4],
}
).set_sorted("asof_key")

expected = pl.DataFrame(
{
"asof_key": [9, 9, 10, 10, 10],
Expand Down Expand Up @@ -597,6 +595,32 @@ def test_asof_join_nearest_with_tolerance() -> None:
)
assert_frame_equal(out, expected)

# Case #9: last item is closest match
df1 = pl.DataFrame(
{
"asof_key_left": [10.00001, 20.0, 30.0],
}
).set_sorted("asof_key_left")
df2 = pl.DataFrame(
{
"asof_key_right": [10.00001, 20.0001, 29.0],
}
).set_sorted("asof_key_right")
out = df1.join_asof(
df2,
left_on="asof_key_left",
right_on="asof_key_right",
strategy="nearest",
tolerance=0.5,
)
expected = pl.DataFrame(
{
"asof_key_left": [10.00001, 20.0, 30.0],
"asof_key_right": [10.00001, 20.0001, None],
}
)
assert_frame_equal(out, expected)


def test_asof_join_nearest_by() -> None:
# Generic join_asof
Expand Down Expand Up @@ -679,6 +703,35 @@ def test_asof_join_nearest_by() -> None:
out = a.join_asof(b, by="code", on="time", strategy="nearest")
assert_frame_equal(out, expected)

# last item is closest match
df1 = pl.DataFrame(
{
"a": [1, 1, 1],
"asof_key_left": [10.00001, 20.0, 30.0],
}
).set_sorted("asof_key_left")
df2 = pl.DataFrame(
{
"a": [1, 1, 1],
"asof_key_right": [10.00001, 20.0001, 29.0],
}
).set_sorted("asof_key_right")
out = df1.join_asof(
df2,
left_on="asof_key_left",
right_on="asof_key_right",
by="a",
strategy="nearest",
)
expected = pl.DataFrame(
{
"a": [1, 1, 1],
"asof_key_left": [10.00001, 20.0, 30.0],
"asof_key_right": [10.00001, 20.0001, 29.0],
}
)
assert_frame_equal(out, expected)


def test_asof_join_nearest_by_with_tolerance() -> None:
df1 = pl.DataFrame(
Expand Down Expand Up @@ -934,6 +987,36 @@ def test_asof_join_nearest_by_with_tolerance() -> None:
).sort(by=["group", "a"])
assert_frame_equal(out, expected)

# last item is closest match
df1 = pl.DataFrame(
{
"a": [1, 1, 1],
"asof_key_left": [10.00001, 20.0, 30.0],
}
).set_sorted("asof_key_left")
df2 = pl.DataFrame(
{
"a": [1, 1, 1],
"asof_key_right": [10.00001, 20.0001, 29.0],
}
).set_sorted("asof_key_right")
out = df1.join_asof(
df2,
left_on="asof_key_left",
right_on="asof_key_right",
by="a",
strategy="nearest",
tolerance=0.5,
)
expected = pl.DataFrame(
{
"a": [1, 1, 1],
"asof_key_left": [10.00001, 20.0, 30.0],
"asof_key_right": [10.00001, 20.0001, None],
}
)
assert_frame_equal(out, expected)


def test_asof_join_nearest_by_date() -> None:
df1 = pl.DataFrame(
Expand Down

0 comments on commit da08773

Please sign in to comment.