Skip to content

Commit

Permalink
depr(python): Deprecate DataFrame.as_dict positional input (#12131)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Oct 31, 2023
1 parent b4c1bf1 commit 5689ad5
Show file tree
Hide file tree
Showing 73 changed files with 810 additions and 647 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1919,7 +1919,7 @@ def to_dict(
) -> dict[str, Series] | dict[str, list[Any]]:
...

# TODO: Make `as_series` keyword-only
@deprecate_nonkeyword_arguments(version="0.19.13")
def to_dict(
self,
as_series: bool = True, # noqa: FBT001
Expand Down
107 changes: 67 additions & 40 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,11 @@ def test_to_dummies() -> None:
assert_frame_equal(result, expected)

# test sorted fast path
assert pl.DataFrame({"x": pl.arange(0, 3, eager=True)}).to_dummies("x").to_dict(
False
) == {"x_0": [1, 0, 0], "x_1": [0, 1, 0], "x_2": [0, 0, 1]}
result = pl.DataFrame({"x": pl.arange(0, 3, eager=True)}).to_dummies("x")
expected = pl.DataFrame(
{"x_0": [1, 0, 0], "x_1": [0, 1, 0], "x_2": [0, 0, 1]}
).with_columns(pl.all().cast(pl.UInt8))
assert_frame_equal(result, expected)


def test_to_dummies_drop_first() -> None:
Expand Down Expand Up @@ -911,7 +913,7 @@ def test_cast_frame() -> None:
]

# cast all fields to a single type
assert df.cast(pl.Utf8).to_dict(False) == {
assert df.cast(pl.Utf8).to_dict(as_series=False) == {
"a": ["1.0", "2.5", "3.0"],
"b": ["4", "5", None],
"c": ["true", "false", "true"],
Expand Down Expand Up @@ -973,7 +975,7 @@ def test_describe() -> None:
}
)

assert df.describe().to_dict(False) == {
assert df.describe().to_dict(as_series=False) == {
"describe": [
"count",
"null_count",
Expand Down Expand Up @@ -1360,7 +1362,7 @@ def __iter__(self) -> Iterator[Any]:
"itms": d.items(),
}
)
assert df.to_dict(False) == {
assert df.to_dict(as_series=False) == {
"keys": [0, 1, 2],
"vals": ["x", "y", "z"],
"itms": [(0, "x"), (1, "y"), (2, "z")],
Expand All @@ -1373,7 +1375,7 @@ def __iter__(self) -> Iterator[Any]:
"rev_itms": reversed(d.items()),
}
)
assert df.to_dict(False) == {
assert df.to_dict(as_series=False) == {
"rev_keys": [2, 1, 0],
"rev_vals": ["z", "y", "x"],
"rev_itms": [(2, "z"), (1, "y"), (0, "x")],
Expand Down Expand Up @@ -1928,7 +1930,7 @@ def test_rename_swap() -> None:
# Select some columns
ldf = ldf.select(["priority", "weekday", "round_number"])

assert ldf.collect().to_dict(False) == {
assert ldf.collect().to_dict(as_series=False) == {
"priority": [1],
"weekday": [2],
"round_number": [3],
Expand All @@ -1944,7 +1946,9 @@ def test_rename_same_name() -> None:
).lazy()
df = df.rename({"groups": "groups"})
df = df.select(["groups"])
assert df.collect().to_dict(False) == {"groups": ["A", "A", "B", "C", "B"]}
assert df.collect().to_dict(as_series=False) == {
"groups": ["A", "A", "B", "C", "B"]
}
df = pl.DataFrame(
{
"nrs": [1, 2, 3, 4, 5],
Expand All @@ -1955,7 +1959,9 @@ def test_rename_same_name() -> None:
df = df.rename({"nrs": "nrs", "groups": "groups"})
df = df.select(["groups"])
df.collect()
assert df.collect().to_dict(False) == {"groups": ["A", "A", "B", "C", "B"]}
assert df.collect().to_dict(as_series=False) == {
"groups": ["A", "A", "B", "C", "B"]
}


def test_fill_null() -> None:
Expand Down Expand Up @@ -1986,7 +1992,7 @@ def test_fill_null() -> None:
pl.all().forward_fill().name.suffix("_forward"),
pl.all().backward_fill().name.suffix("_backward"),
]
).to_dict(False) == {
).to_dict(as_series=False) == {
"c_forward": [
["Apple", "Orange"],
["Apple", "Orange"],
Expand Down Expand Up @@ -2148,11 +2154,11 @@ def test_df_series_division() -> None:
}
)
s = pl.Series([2, 2, 2, 2, 2, 2])
assert (df / s).to_dict(False) == {
assert (df / s).to_dict(as_series=False) == {
"a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
"b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0],
}
assert (df // s).to_dict(False) == {
assert (df // s).to_dict(as_series=False) == {
"a": [1, 1, 2, 2, 3, 3],
"b": [1, 1, 5, 2, 3, 3],
}
Expand Down Expand Up @@ -2503,7 +2509,10 @@ def test_explode_empty() -> None:
.group_by("x", maintain_order=True)
.agg(pl.col("y").take([]))
)
assert df.explode("y").to_dict(False) == {"x": ["a", "b"], "y": [None, None]}
assert df.explode("y").to_dict(as_series=False) == {
"x": ["a", "b"],
"y": [None, None],
}

df = pl.DataFrame({"x": ["1", "2", "4"], "y": [["a", "b", "c"], ["d"], []]})
assert_frame_equal(
Expand All @@ -2517,7 +2526,10 @@ def test_explode_empty() -> None:
"numbers": [[]],
}
)
assert df.explode("numbers").to_dict(False) == {"letters": ["a"], "numbers": [None]}
assert df.explode("numbers").to_dict(as_series=False) == {
"letters": ["a"],
"numbers": [None],
}


def test_asof_by_multiple_keys() -> None:
Expand Down Expand Up @@ -2560,10 +2572,12 @@ def test_partition_by() -> None:
{"foo": ["C"], "N": [2], "bar": ["l"]},
]
assert [
a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True)
a.to_dict(as_series=False)
for a in df.partition_by("foo", "bar", maintain_order=True)
] == expected
assert [
a.to_dict(False) for a in df.partition_by(cs.string(), maintain_order=True)
a.to_dict(as_series=False)
for a in df.partition_by(cs.string(), maintain_order=True)
] == expected

expected = [
Expand All @@ -2573,26 +2587,30 @@ def test_partition_by() -> None:
{"N": [2]},
]
assert [
a.to_dict(False)
a.to_dict(as_series=False)
for a in df.partition_by(["foo", "bar"], maintain_order=True, include_key=False)
] == expected
assert [
a.to_dict(False)
a.to_dict(as_series=False)
for a in df.partition_by("foo", "bar", maintain_order=True, include_key=False)
] == expected

assert [a.to_dict(False) for a in df.partition_by("foo", maintain_order=True)] == [
assert [
a.to_dict(as_series=False) for a in df.partition_by("foo", maintain_order=True)
] == [
{"foo": ["A", "A"], "N": [1, 2], "bar": ["k", "l"]},
{"foo": ["B", "B"], "N": [2, 4], "bar": ["m", "m"]},
{"foo": ["C"], "N": [2], "bar": ["l"]},
]

df = pl.DataFrame({"a": ["one", "two", "one", "two"], "b": [1, 2, 3, 4]})
assert df.partition_by(cs.all(), as_dict=True)["one", 1].to_dict(False) == {
assert df.partition_by(cs.all(), as_dict=True)["one", 1].to_dict(
as_series=False
) == {
"a": ["one"],
"b": [1],
}
assert df.partition_by(["a"], as_dict=True)["one"].to_dict(False) == {
assert df.partition_by(["a"], as_dict=True)["one"].to_dict(as_series=False) == {
"a": ["one", "one"],
"b": [1, 3],
}
Expand Down Expand Up @@ -2624,9 +2642,9 @@ def test_list_of_list_of_struct() -> None:


def test_concat_to_empty() -> None:
assert pl.concat([pl.DataFrame([]), pl.DataFrame({"a": [1]})]).to_dict(False) == {
"a": [1]
}
assert pl.concat([pl.DataFrame([]), pl.DataFrame({"a": [1]})]).to_dict(
as_series=False
) == {"a": [1]}


def test_fill_null_limits() -> None:
Expand All @@ -2641,7 +2659,7 @@ def test_fill_null_limits() -> None:
pl.all().fill_null(strategy="forward", limit=2),
pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"),
]
).to_dict(False) == {
).to_dict(as_series=False) == {
"a": [1, 1, 1, None, 5, 6, 6, 6, None, 10],
"b": ["a", "a", "a", None, "b", "c", "c", "c", None, "d"],
"c": [True, True, True, None, False, True, True, True, None, False],
Expand Down Expand Up @@ -2709,33 +2727,42 @@ def test_selection_regex_and_multicol() -> None:
)

# Multi * Single
assert test_df.select(pl.col(["a", "b", "c"]) * pl.col("foo")).to_dict(False) == {
assert test_df.select(pl.col(["a", "b", "c"]) * pl.col("foo")).to_dict(
as_series=False
) == {
"a": [13, 28, 45, 64],
"b": [65, 84, 105, 128],
"c": [117, 140, 165, 192],
}
assert test_df.select(pl.all().exclude("foo") * pl.col("foo")).to_dict(False) == {
assert test_df.select(pl.all().exclude("foo") * pl.col("foo")).to_dict(
as_series=False
) == {
"a": [13, 28, 45, 64],
"b": [65, 84, 105, 128],
"c": [117, 140, 165, 192],
}

assert test_df.select(pl.col("^\\w$") * pl.col("foo")).to_dict(False) == {
assert test_df.select(pl.col("^\\w$") * pl.col("foo")).to_dict(as_series=False) == {
"a": [13, 28, 45, 64],
"b": [65, 84, 105, 128],
"c": [117, 140, 165, 192],
}

# Multi * Multi
assert test_df.select(pl.col(["a", "b", "c"]) * pl.col(["a", "b", "c"])).to_dict(
False
) == {"a": [1, 4, 9, 16], "b": [25, 36, 49, 64], "c": [81, 100, 121, 144]}
assert test_df.select(pl.exclude("foo") * pl.exclude("foo")).to_dict(False) == {
result = test_df.select(pl.col(["a", "b", "c"]) * pl.col(["a", "b", "c"]))
expected = {"a": [1, 4, 9, 16], "b": [25, 36, 49, 64], "c": [81, 100, 121, 144]}
assert result.to_dict(as_series=False) == expected

assert test_df.select(pl.exclude("foo") * pl.exclude("foo")).to_dict(
as_series=False
) == {
"a": [1, 4, 9, 16],
"b": [25, 36, 49, 64],
"c": [81, 100, 121, 144],
}
assert test_df.select(pl.col("^\\w$") * pl.col("^\\w$")).to_dict(False) == {
assert test_df.select(pl.col("^\\w$") * pl.col("^\\w$")).to_dict(
as_series=False
) == {
"a": [1, 4, 9, 16],
"b": [25, 36, 49, 64],
"c": [81, 100, 121, 144],
Expand Down Expand Up @@ -2852,7 +2879,7 @@ def test_indexing_set() -> None:
df[0, "nr"] = 100
df[0, "str"] = "foo"

assert df.to_dict(False) == {
assert df.to_dict(as_series=False) == {
"bool": [False, True],
"str": ["foo", "N/A"],
"nr": [100, 2],
Expand Down Expand Up @@ -3297,15 +3324,15 @@ def test_deadlocks_3409() -> None:
pl.element().map_elements(lambda x: x, return_dtype=pl.Int64)
)
)
.to_dict(False)
.to_dict(as_series=False)
) == {"col1": [[1, 2, 3]]}

assert (
pl.DataFrame({"col1": [1, 2, 3]})
.with_columns(
[pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0))]
)
.to_dict(False)
.to_dict(as_series=False)
) == {"col1": [0, 0, 0]}


Expand Down Expand Up @@ -3434,7 +3461,7 @@ def test_unstack() -> None:
"col3": pl.int_range(-9, 0, eager=True),
}
)
assert df.unstack(step=3, how="vertical").to_dict(False) == {
assert df.unstack(step=3, how="vertical").to_dict(as_series=False) == {
"col1_0": ["A", "B", "C"],
"col1_1": ["D", "E", "F"],
"col1_2": ["G", "H", "I"],
Expand All @@ -3446,7 +3473,7 @@ def test_unstack() -> None:
"col3_2": [-3, -2, -1],
}

assert df.unstack(step=3, how="horizontal").to_dict(False) == {
assert df.unstack(step=3, how="horizontal").to_dict(as_series=False) == {
"col1_0": ["A", "D", "G"],
"col1_1": ["B", "E", "H"],
"col1_2": ["C", "F", "I"],
Expand All @@ -3463,7 +3490,7 @@ def test_unstack() -> None:
step=3,
how="horizontal",
columns=column_subset, # type: ignore[arg-type]
).to_dict(False) == {
).to_dict(as_series=False) == {
"col2_0": [0, 3, 6],
"col2_1": [1, 4, 7],
"col2_2": [2, 5, 8],
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/dataframe/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_from_dict_with_scalars() -> None:
df1 = pl.DataFrame(
{"key": ["aa", "bb", "cc"], "misc": "xyz", "other": None, "value": 0}
)
assert df1.to_dict(False) == {
assert df1.to_dict(as_series=False) == {
"key": ["aa", "bb", "cc"],
"misc": ["xyz", "xyz", "xyz"],
"other": [None, None, None],
Expand All @@ -50,7 +50,7 @@ def test_from_dict_with_scalars() -> None:

# edge-case: all scalars
df2 = pl.DataFrame({"key": "aa", "misc": "xyz", "other": None, "value": 0})
assert df2.to_dict(False) == {
assert df2.to_dict(as_series=False) == {
"key": ["aa"],
"misc": ["xyz"],
"other": [None],
Expand All @@ -59,7 +59,7 @@ def test_from_dict_with_scalars() -> None:

# edge-case: single unsized generator
df3 = pl.DataFrame({"vals": map(float, [1, 2, 3])})
assert df3.to_dict(False) == {"vals": [1.0, 2.0, 3.0]}
assert df3.to_dict(as_series=False) == {"vals": [1.0, 2.0, 3.0]}

# ensure we don't accidentally consume or expand map/range/generator
# cols, and can properly apply schema dtype/ordering directives
Expand All @@ -78,7 +78,7 @@ def test_from_dict_with_scalars() -> None:
},
)
assert df4.columns == ["value", "other", "misc", "key"]
assert df4.to_dict(False) == {
assert df4.to_dict(as_series=False) == {
"value": ["x", "y", "z"],
"other": [7.0, 8.0, 9.0],
"misc": [4, 5, 6],
Expand Down
7 changes: 5 additions & 2 deletions py-polars/tests/unit/datatypes/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ def test_array_in_group_by() -> None:
"g": pl.Int64,
"a": pl.List(pl.Array(inner=pl.Int64, width=2)),
}
assert out.to_dict(False) == {"g": [1, 2], "a": [[[1, 2], [2, 2]], [[1, 4]]]}
assert out.to_dict(as_series=False) == {
"g": [1, 2],
"a": [[[1, 2], [2, 2]], [[1, 4]]],
}


def test_array_invalid_operation() -> None:
Expand All @@ -101,7 +104,7 @@ def test_array_concat() -> None:
b_df = pl.DataFrame({"a": [[1, 1], [0, 0]]}).select(
pl.col("a").cast(pl.Array(inner=pl.Int32, width=2))
)
assert pl.concat([a_df, b_df]).to_dict(False) == {
assert pl.concat([a_df, b_df]).to_dict(as_series=False) == {
"a": [[0, 1], [1, 0], [1, 1], [0, 0]]
}

Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/datatypes/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_binary_filter() -> None:
"content": [b"aa", b"aaabbb", b"aa", b"\xc6i\xea"],
}
)
assert df.filter(pl.col("content") == b"\xc6i\xea").to_dict(False) == {
assert df.filter(pl.col("content") == b"\xc6i\xea").to_dict(as_series=False) == {
"name": ["d"],
"content": [b"\xc6i\xea"],
}
Expand Down
Loading

0 comments on commit 5689ad5

Please sign in to comment.