Skip to content

Commit

Permalink
Update schema property references
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jun 13, 2024
1 parent 94c8c68 commit 76894ed
Show file tree
Hide file tree
Showing 34 changed files with 80 additions and 65 deletions.
2 changes: 1 addition & 1 deletion py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,7 +1708,7 @@ def test_schema_equality() -> None:
lf = pl.LazyFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
lf_rev = lf.select("bar", "foo")

assert lf.schema != lf_rev.schema
assert lf.collect_schema() != lf_rev.collect_schema()
assert lf.collect().schema != lf_rev.collect().schema


Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/datatypes/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ def test_struct_null_count_10130() -> None:
def test_struct_arithmetic_schema() -> None:
q = pl.LazyFrame({"A": [1], "B": [2]})

assert q.select(pl.struct("A") - pl.struct("B")).schema["A"] == pl.Struct(
assert q.select(pl.struct("A") - pl.struct("B")).collect_schema()["A"] == pl.Struct(
{"A": pl.Int64}
)

Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1225,7 +1225,7 @@ def test_convert_time_zone_lazy_schema() -> None:
result = ldf.with_columns(
pl.col("ts_us").dt.convert_time_zone("America/New_York").alias("ts_us_ny"),
pl.col("ts_ms").dt.convert_time_zone("America/New_York").alias("ts_us_kt"),
).schema
).collect_schema()
expected = {
"ts_us": pl.Datetime("us", "UTC"),
"ts_ms": pl.Datetime("ms", "UTC"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_duration_time_units(time_unit: TimeUnit, expected: timedelta) -> None:
time_unit=time_unit,
)
)
assert result.schema["duration"] == pl.Duration(time_unit)
assert result.collect_schema()["duration"] == pl.Duration(time_unit)
assert result.collect()["duration"].item() == expected
if time_unit == "ns":
assert (
Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/functions/range/test_datetime_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def test_datetime_ranges_schema(
pl.Datetime(time_unit=output_time_unit, time_zone=output_time_zone)
),
}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema

expected = pl.DataFrame(
Expand Down Expand Up @@ -442,7 +442,7 @@ def test_datetime_range_schema_upcasts_to_datetime(
"end": pl.Date,
"datetime_range": pl.List(output_dtype),
}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema

expected = pl.DataFrame(
Expand Down Expand Up @@ -485,7 +485,7 @@ def test_datetime_ranges_no_alias_schema_9037() -> None:
"start": pl.List(pl.Datetime(time_unit="us", time_zone=None)),
"end": pl.Datetime(time_unit="us", time_zone=None),
}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema


Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/functions/range/test_int_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_int_range_schema() -> None:
result = pl.LazyFrame().select(int=pl.int_range(-3, 3))

expected_schema = {"int": pl.Int64}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema


Expand Down Expand Up @@ -139,7 +139,7 @@ def test_int_ranges_schema_dtype_default() -> None:
result = lf.select(pl.int_ranges("start", "end"))

expected_schema = {"start": pl.List(pl.Int64)}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema


Expand All @@ -149,7 +149,7 @@ def test_int_ranges_schema_dtype_arg() -> None:
result = lf.select(pl.int_ranges("start", "end", dtype=pl.UInt16))

expected_schema = {"start": pl.List(pl.UInt16)}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema


Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/functions/range/test_time_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_time_range_schema() -> None:
df = pl.DataFrame({"start": [time(1)], "end": [time(1, 30)]}).lazy()
result = df.with_columns(time_range=pl.time_ranges(pl.col("start"), pl.col("end")))
expected_schema = {"start": pl.Time, "end": pl.Time, "time_range": pl.List(pl.Time)}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema


Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/functions/test_business_day_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_business_day_count_schema() -> None:
result = lf.select(
business_day_count=pl.business_day_count("start", "end"),
)
assert result.schema["business_day_count"] == pl.Int32
assert result.collect_schema()["business_day_count"] == pl.Int32
assert result.collect().schema["business_day_count"] == pl.Int32
assert 'col("start").business_day_count([col("end")])' in result.explain()

Expand Down
6 changes: 4 additions & 2 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,9 @@ def dataset_path(tmp_path: Path) -> Path:
@pytest.mark.write_disk()
def test_scan_parquet_hive_schema(dataset_path: Path) -> None:
result = pl.scan_parquet(dataset_path / "**/*.parquet", hive_partitioning=True)
assert result.schema == OrderedDict({"a": pl.Int64, "b": pl.Float64, "c": pl.Int64})
assert result.collect_schema() == OrderedDict(
{"a": pl.Int64, "b": pl.Float64, "c": pl.Int64}
)

result = pl.scan_parquet(
dataset_path / "**/*.parquet",
Expand All @@ -248,7 +250,7 @@ def test_scan_parquet_hive_schema(dataset_path: Path) -> None:
)

expected_schema = OrderedDict({"a": pl.Int64, "b": pl.Float64, "c": pl.Int32})
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema
assert result.collect().schema == expected_schema


Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/io/test_iceberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class TestIcebergScanIO:
def test_scan_iceberg_plain(self, iceberg_path: str) -> None:
df = pl.scan_iceberg(iceberg_path)
assert len(df.collect()) == 3
assert df.schema == {
assert df.collect_schema() == {
"id": pl.Int32,
"str": pl.String,
"ts": pl.Datetime(time_unit="us", time_zone=None),
Expand Down
25 changes: 15 additions & 10 deletions py-polars/tests/unit/lazyframe/test_lazyframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ def test_cast_frame() -> None:
# cast via col:dtype map
assert lf.cast(
dtypes={"b": pl.Float32, "c": pl.String, "d": pl.Datetime("ms")}
).schema == {
).collect_schema() == {
"a": pl.Float64,
"b": pl.Float32,
"c": pl.String,
Expand All @@ -591,7 +591,12 @@ def test_cast_frame() -> None:
cs.temporal(): pl.String,
}
)
assert lfc.schema == {"a": pl.UInt8, "b": pl.Int32, "c": pl.Boolean, "d": pl.String}
assert lfc.collect_schema() == {
"a": pl.UInt8,
"b": pl.Int32,
"c": pl.Boolean,
"d": pl.String,
}
assert lfc.collect().rows() == [
(1, 4, True, "2020-01-02"),
(2, 5, False, "2021-03-04"),
Expand Down Expand Up @@ -1245,22 +1250,22 @@ def test_cum_agg_types() -> None:
pl.col("b").cum_sum(),
pl.col("c").cum_sum(),
)
assert cum_sum_lf.schema["a"] == pl.Int64
assert cum_sum_lf.schema["b"] == pl.UInt32
assert cum_sum_lf.schema["c"] == pl.Float64
assert cum_sum_lf.collect_schema()["a"] == pl.Int64
assert cum_sum_lf.collect_schema()["b"] == pl.UInt32
assert cum_sum_lf.collect_schema()["c"] == pl.Float64
collected_cumsum_lf = cum_sum_lf.collect()
assert collected_cumsum_lf.schema == cum_sum_lf.schema
assert collected_cumsum_lf.schema == cum_sum_lf.collect_schema()

cum_prod_lf = ldf.select(
pl.col("a").cast(pl.UInt64).cum_prod(),
pl.col("b").cum_prod(),
pl.col("c").cum_prod(),
)
assert cum_prod_lf.schema["a"] == pl.UInt64
assert cum_prod_lf.schema["b"] == pl.Int64
assert cum_prod_lf.schema["c"] == pl.Float64
assert cum_prod_lf.collect_schema()["a"] == pl.UInt64
assert cum_prod_lf.collect_schema()["b"] == pl.Int64
assert cum_prod_lf.collect_schema()["c"] == pl.Float64
collected_cum_prod_lf = cum_prod_lf.collect()
assert collected_cum_prod_lf.schema == cum_prod_lf.schema
assert collected_cum_prod_lf.schema == cum_prod_lf.collect_schema()


def test_compare_schema_between_lazy_and_eager_6904() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,12 +434,12 @@ def test_schema_mean_horizontal_single_column(
pl.mean_horizontal(pl.all())
)

assert lf.schema == OrderedDict([("a", out_dtype)])
assert lf.collect_schema() == OrderedDict([("a", out_dtype)])


def test_schema_boolean_sum_horizontal() -> None:
lf = pl.LazyFrame({"a": [True, False]}).select(pl.sum_horizontal("a"))
assert lf.schema == OrderedDict([("a", pl.UInt32)])
assert lf.collect_schema() == OrderedDict([("a", pl.UInt32)])


def test_fold_all_schema() -> None:
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ def test_literal_subtract_schema_13284() -> None:
.with_columns(pl.col("a") - pl.lit(1))
.group_by("a")
.len()
).schema == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
).collect_schema() == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])


def test_int_operator_stability() -> None:
Expand All @@ -630,5 +630,5 @@ def test_duration_division_schema() -> None:
.select(pl.col("a") / pl.col("a"))
)

assert q.schema == {"a": pl.Float64}
assert q.collect_schema() == {"a": pl.Float64}
assert q.collect().to_dict(as_series=False) == {"a": [1.0]}
2 changes: 1 addition & 1 deletion py-polars/tests/unit/operations/map/test_map_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_map_groups_lazy() -> None:

expected = pl.LazyFrame({"a": [6.0, 2.0, 2.0], "b": [6.0, 2.0, 4.0]})
assert_frame_equal(result, expected, check_row_order=False)
assert result.schema == expected.schema
assert result.collect_schema() == expected.collect_schema()


def test_map_groups_rolling() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_array_min_max_dtype_12123() -> None:
min=pl.col("a").arr.min().alias("min"),
)

assert df.schema == {
assert df.collect_schema() == {
"a": pl.Array(pl.Float64, 2),
"b": pl.Float64,
"max": pl.Float64,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,7 +900,7 @@ def test_list_eval_err_raise_15653() -> None:

def test_list_sum_bool_schema() -> None:
q = pl.LazyFrame({"x": [[True, True, False]]})
assert q.select(pl.col("x").list.sum()).schema["x"] == pl.UInt32
assert q.select(pl.col("x").list.sum()).collect_schema()["x"] == pl.UInt32


def test_list_eval_type_cast_11188() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_hex_decode_return_dtype() -> None:
assert df.schema == {"a": pl.Binary}

ldf = pl.LazyFrame(data).select(expr)
assert ldf.schema == {"a": pl.Binary}
assert ldf.collect_schema() == {"a": pl.Binary}


def test_base64_decode_return_dtype() -> None:
Expand All @@ -348,7 +348,7 @@ def test_base64_decode_return_dtype() -> None:
assert df.schema == {"a": pl.Binary}

ldf = pl.LazyFrame(data).select(expr)
assert ldf.schema == {"a": pl.Binary}
assert ldf.collect_schema() == {"a": pl.Binary}


def test_str_replace_str_replace_all() -> None:
Expand Down Expand Up @@ -628,7 +628,7 @@ def test_json_decode_lazy_expr() -> None:
expected = pl.DataFrame(
{"json": [{"a": 1, "b": True}, None, {"a": 2, "b": False}]}
).lazy()
assert ldf.schema == {"json": dtype}
assert ldf.collect_schema() == {"json": dtype}
assert_frame_equal(ldf, expected)


Expand Down Expand Up @@ -1444,7 +1444,7 @@ def test_string_extract_groups_lazy_schema_10305() -> None:
"captures"
)

assert df.schema == {"candidate": pl.String, "ref": pl.String}
assert df.collect_schema() == {"candidate": pl.String, "ref": pl.String}


def test_string_reverse() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_add_business_days_schema() -> None:
result = lf.select(
result=pl.col("start").dt.add_business_days("n"),
)
assert result.schema["result"] == pl.Date
assert result.collect_schema()["result"] == pl.Date
assert result.collect().schema["result"] == pl.Date
assert 'col("start").add_business_days([col("n")])' in result.explain()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,9 @@ def test_base_utc_offset_lazy_schema() -> None:
eager=True,
)
df = pl.DataFrame({"ts": ser}).lazy()
result = df.with_columns(base_utc_offset=pl.col("ts").dt.base_utc_offset()).schema
result = df.with_columns(
base_utc_offset=pl.col("ts").dt.base_utc_offset()
).collect_schema()
expected = {
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
"base_utc_offset": pl.Duration(time_unit="ms"),
Expand Down Expand Up @@ -382,7 +384,7 @@ def test_dst_offset_lazy_schema() -> None:
eager=True,
)
df = pl.DataFrame({"ts": ser}).lazy()
result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).schema
result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).collect_schema()
expected = {
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
"dst_offset": pl.Duration(time_unit="ms"),
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/operations/test_bitwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
def test_bitwise_integral_schema(op: str) -> None:
df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
assert q.schema["a"] == df.schema["a"]
assert q.collect_schema()["a"] == df.collect_schema()["a"]
4 changes: 2 additions & 2 deletions py-polars/tests/unit/operations/test_clear.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def test_clear_lf() -> None:
}
)
ldfe = lf.clear()
assert ldfe.schema == lf.schema
assert ldfe.collect_schema() == lf.collect_schema()

ldfe = lf.clear(2)
assert ldfe.schema == lf.schema
assert ldfe.collect_schema() == lf.collect_schema()
assert ldfe.collect().rows() == [(None, None, None), (None, None, None)]


Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/operations/test_ewm_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_ewma_by_date(sort: bool) -> None:
{"values": [None, 1.0, 1.9116116523516815, None, 3.815410804703363]}
)
assert_frame_equal(result.collect(), expected)
assert result.schema["values"] == pl.Float64
assert result.collect_schema()["values"] == pl.Float64
assert result.collect().schema["values"] == pl.Float64


Expand Down Expand Up @@ -88,7 +88,7 @@ def test_ewma_f32() -> None:
schema_overrides={"values": pl.Float32},
)
assert_frame_equal(result.collect(), expected)
assert result.schema["values"] == pl.Float32
assert result.collect_schema()["values"] == pl.Float32
assert result.collect().schema["values"] == pl.Float32


Expand Down Expand Up @@ -170,7 +170,7 @@ def test_ewma_by_index(data_type: PolarsIntegerType) -> None:
{"values": [None, 1.0, 1.9116116523516815, None, 3.815410804703363]}
)
assert_frame_equal(result.collect(), expected)
assert result.schema["values"] == pl.Float64
assert result.collect_schema()["values"] == pl.Float64
assert result.collect().schema["values"] == pl.Float64


Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/operations/test_fill_null.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_fill_null_static_schema_4843() -> None:

df2 = df1.select([pl.col(pl.Int64).fill_null(0)])
df3 = df2.select(pl.col(pl.Int64))
assert df3.schema == {"a": pl.Int64, "b": pl.Int64}
assert df3.collect_schema() == {"a": pl.Int64, "b": pl.Int64}


def test_fill_null_f32_with_lit() -> None:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/operations/test_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_gather_agg_schema() -> None:
df.lazy()
.group_by("group", maintain_order=True)
.agg(pl.col("value").get(1))
.schema["value"]
.collect_schema()["value"]
== pl.Int64
)

Expand Down
6 changes: 4 additions & 2 deletions py-polars/tests/unit/operations/test_group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,13 +1022,15 @@ def test_schema_on_agg() -> None:
"first": pl.Int64,
"last": pl.Int64,
}
assert result.schema == expected_schema
assert result.collect_schema() == expected_schema


def test_group_by_schema_err() -> None:
lf = pl.LazyFrame({"foo": [None, 1, 2], "bar": [1, 2, 3]})
with pytest.raises(pl.ColumnNotFoundError):
lf.group_by("not-existent").agg(pl.col("bar").max().alias("max_bar")).schema
lf.group_by("not-existent").agg(
pl.col("bar").max().alias("max_bar")
).collect_schema()


@pytest.mark.parametrize(
Expand Down
Loading

0 comments on commit 76894ed

Please sign in to comment.