Update schema property references

pola-rs · Jun 13, 2024 · 76894ed · 76894ed
1 parent 94c8c68
commit 76894ed
Show file tree

Hide file tree

Showing 34 changed files with 80 additions and 65 deletions.
diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -1708,7 +1708,7 @@ def test_schema_equality() -> None:
     lf = pl.LazyFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
     lf_rev = lf.select("bar", "foo")
 
-    assert lf.schema != lf_rev.schema
+    assert lf.collect_schema() != lf_rev.collect_schema()
     assert lf.collect().schema != lf_rev.collect().schema
 
 

diff --git a/py-polars/tests/unit/datatypes/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py
@@ -871,7 +871,7 @@ def test_struct_null_count_10130() -> None:
 def test_struct_arithmetic_schema() -> None:
     q = pl.LazyFrame({"A": [1], "B": [2]})
 
-    assert q.select(pl.struct("A") - pl.struct("B")).schema["A"] == pl.Struct(
+    assert q.select(pl.struct("A") - pl.struct("B")).collect_schema()["A"] == pl.Struct(
         {"A": pl.Int64}
     )
 

diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py
@@ -1225,7 +1225,7 @@ def test_convert_time_zone_lazy_schema() -> None:
     result = ldf.with_columns(
         pl.col("ts_us").dt.convert_time_zone("America/New_York").alias("ts_us_ny"),
         pl.col("ts_ms").dt.convert_time_zone("America/New_York").alias("ts_us_kt"),
-    ).schema
+    ).collect_schema()
     expected = {
         "ts_us": pl.Datetime("us", "UTC"),
         "ts_ms": pl.Datetime("ms", "UTC"),

diff --git a/py-polars/tests/unit/functions/as_datatype/test_duration.py b/py-polars/tests/unit/functions/as_datatype/test_duration.py
@@ -38,7 +38,7 @@ def test_duration_time_units(time_unit: TimeUnit, expected: timedelta) -> None:
             time_unit=time_unit,
         )
     )
-    assert result.schema["duration"] == pl.Duration(time_unit)
+    assert result.collect_schema()["duration"] == pl.Duration(time_unit)
     assert result.collect()["duration"].item() == expected
     if time_unit == "ns":
         assert (

diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py
@@ -338,7 +338,7 @@ def test_datetime_ranges_schema(
             pl.Datetime(time_unit=output_time_unit, time_zone=output_time_zone)
         ),
     }
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
     expected = pl.DataFrame(
@@ -442,7 +442,7 @@ def test_datetime_range_schema_upcasts_to_datetime(
         "end": pl.Date,
         "datetime_range": pl.List(output_dtype),
     }
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
     expected = pl.DataFrame(
@@ -485,7 +485,7 @@ def test_datetime_ranges_no_alias_schema_9037() -> None:
         "start": pl.List(pl.Datetime(time_unit="us", time_zone=None)),
         "end": pl.Datetime(time_unit="us", time_zone=None),
     }
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
 

diff --git a/py-polars/tests/unit/functions/range/test_int_range.py b/py-polars/tests/unit/functions/range/test_int_range.py
@@ -73,7 +73,7 @@ def test_int_range_schema() -> None:
     result = pl.LazyFrame().select(int=pl.int_range(-3, 3))
 
     expected_schema = {"int": pl.Int64}
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
 
@@ -139,7 +139,7 @@ def test_int_ranges_schema_dtype_default() -> None:
     result = lf.select(pl.int_ranges("start", "end"))
 
     expected_schema = {"start": pl.List(pl.Int64)}
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
 
@@ -149,7 +149,7 @@ def test_int_ranges_schema_dtype_arg() -> None:
     result = lf.select(pl.int_ranges("start", "end", dtype=pl.UInt16))
 
     expected_schema = {"start": pl.List(pl.UInt16)}
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
 

diff --git a/py-polars/tests/unit/functions/range/test_time_range.py b/py-polars/tests/unit/functions/range/test_time_range.py
@@ -16,7 +16,7 @@ def test_time_range_schema() -> None:
     df = pl.DataFrame({"start": [time(1)], "end": [time(1, 30)]}).lazy()
     result = df.with_columns(time_range=pl.time_ranges(pl.col("start"), pl.col("end")))
     expected_schema = {"start": pl.Time, "end": pl.Time, "time_range": pl.List(pl.Time)}
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
 

diff --git a/py-polars/tests/unit/functions/test_business_day_count.py b/py-polars/tests/unit/functions/test_business_day_count.py
@@ -108,7 +108,7 @@ def test_business_day_count_schema() -> None:
     result = lf.select(
         business_day_count=pl.business_day_count("start", "end"),
     )
-    assert result.schema["business_day_count"] == pl.Int32
+    assert result.collect_schema()["business_day_count"] == pl.Int32
     assert result.collect().schema["business_day_count"] == pl.Int32
     assert 'col("start").business_day_count([col("end")])' in result.explain()
 

diff --git a/py-polars/tests/unit/io/test_hive.py b/py-polars/tests/unit/io/test_hive.py
@@ -239,7 +239,9 @@ def dataset_path(tmp_path: Path) -> Path:
 @pytest.mark.write_disk()
 def test_scan_parquet_hive_schema(dataset_path: Path) -> None:
     result = pl.scan_parquet(dataset_path / "**/*.parquet", hive_partitioning=True)
-    assert result.schema == OrderedDict({"a": pl.Int64, "b": pl.Float64, "c": pl.Int64})
+    assert result.collect_schema() == OrderedDict(
+        {"a": pl.Int64, "b": pl.Float64, "c": pl.Int64}
+    )
 
     result = pl.scan_parquet(
         dataset_path / "**/*.parquet",
@@ -248,7 +250,7 @@ def test_scan_parquet_hive_schema(dataset_path: Path) -> None:
     )
 
     expected_schema = OrderedDict({"a": pl.Int64, "b": pl.Float64, "c": pl.Int32})
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
     assert result.collect().schema == expected_schema
 
 

diff --git a/py-polars/tests/unit/io/test_iceberg.py b/py-polars/tests/unit/io/test_iceberg.py
@@ -38,7 +38,7 @@ class TestIcebergScanIO:
     def test_scan_iceberg_plain(self, iceberg_path: str) -> None:
         df = pl.scan_iceberg(iceberg_path)
         assert len(df.collect()) == 3
-        assert df.schema == {
+        assert df.collect_schema() == {
             "id": pl.Int32,
             "str": pl.String,
             "ts": pl.Datetime(time_unit="us", time_zone=None),

diff --git a/py-polars/tests/unit/lazyframe/test_lazyframe.py b/py-polars/tests/unit/lazyframe/test_lazyframe.py
@@ -576,7 +576,7 @@ def test_cast_frame() -> None:
     # cast via col:dtype map
     assert lf.cast(
         dtypes={"b": pl.Float32, "c": pl.String, "d": pl.Datetime("ms")}
-    ).schema == {
+    ).collect_schema() == {
         "a": pl.Float64,
         "b": pl.Float32,
         "c": pl.String,
@@ -591,7 +591,12 @@ def test_cast_frame() -> None:
             cs.temporal(): pl.String,
         }
     )
-    assert lfc.schema == {"a": pl.UInt8, "b": pl.Int32, "c": pl.Boolean, "d": pl.String}
+    assert lfc.collect_schema() == {
+        "a": pl.UInt8,
+        "b": pl.Int32,
+        "c": pl.Boolean,
+        "d": pl.String,
+    }
     assert lfc.collect().rows() == [
         (1, 4, True, "2020-01-02"),
         (2, 5, False, "2021-03-04"),
@@ -1245,22 +1250,22 @@ def test_cum_agg_types() -> None:
         pl.col("b").cum_sum(),
         pl.col("c").cum_sum(),
     )
-    assert cum_sum_lf.schema["a"] == pl.Int64
-    assert cum_sum_lf.schema["b"] == pl.UInt32
-    assert cum_sum_lf.schema["c"] == pl.Float64
+    assert cum_sum_lf.collect_schema()["a"] == pl.Int64
+    assert cum_sum_lf.collect_schema()["b"] == pl.UInt32
+    assert cum_sum_lf.collect_schema()["c"] == pl.Float64
     collected_cumsum_lf = cum_sum_lf.collect()
-    assert collected_cumsum_lf.schema == cum_sum_lf.schema
+    assert collected_cumsum_lf.schema == cum_sum_lf.collect_schema()
 
     cum_prod_lf = ldf.select(
         pl.col("a").cast(pl.UInt64).cum_prod(),
         pl.col("b").cum_prod(),
         pl.col("c").cum_prod(),
     )
-    assert cum_prod_lf.schema["a"] == pl.UInt64
-    assert cum_prod_lf.schema["b"] == pl.Int64
-    assert cum_prod_lf.schema["c"] == pl.Float64
+    assert cum_prod_lf.collect_schema()["a"] == pl.UInt64
+    assert cum_prod_lf.collect_schema()["b"] == pl.Int64
+    assert cum_prod_lf.collect_schema()["c"] == pl.Float64
     collected_cum_prod_lf = cum_prod_lf.collect()
-    assert collected_cum_prod_lf.schema == cum_prod_lf.schema
+    assert collected_cum_prod_lf.schema == cum_prod_lf.collect_schema()
 
 
 def test_compare_schema_between_lazy_and_eager_6904() -> None:

diff --git a/py-polars/tests/unit/operations/aggregation/test_horizontal.py b/py-polars/tests/unit/operations/aggregation/test_horizontal.py
@@ -434,12 +434,12 @@ def test_schema_mean_horizontal_single_column(
         pl.mean_horizontal(pl.all())
     )
 
-    assert lf.schema == OrderedDict([("a", out_dtype)])
+    assert lf.collect_schema() == OrderedDict([("a", out_dtype)])
 
 
 def test_schema_boolean_sum_horizontal() -> None:
     lf = pl.LazyFrame({"a": [True, False]}).select(pl.sum_horizontal("a"))
-    assert lf.schema == OrderedDict([("a", pl.UInt32)])
+    assert lf.collect_schema() == OrderedDict([("a", pl.UInt32)])
 
 
 def test_fold_all_schema() -> None:

diff --git a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
@@ -609,7 +609,7 @@ def test_literal_subtract_schema_13284() -> None:
         .with_columns(pl.col("a") - pl.lit(1))
         .group_by("a")
         .len()
-    ).schema == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
+    ).collect_schema() == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
 
 
 def test_int_operator_stability() -> None:
@@ -630,5 +630,5 @@ def test_duration_division_schema() -> None:
         .select(pl.col("a") / pl.col("a"))
     )
 
-    assert q.schema == {"a": pl.Float64}
+    assert q.collect_schema() == {"a": pl.Float64}
     assert q.collect().to_dict(as_series=False) == {"a": [1.0]}
diff --git a/py-polars/tests/unit/operations/map/test_map_groups.py b/py-polars/tests/unit/operations/map/test_map_groups.py
@@ -32,7 +32,7 @@ def test_map_groups_lazy() -> None:
 
     expected = pl.LazyFrame({"a": [6.0, 2.0, 2.0], "b": [6.0, 2.0, 4.0]})
     assert_frame_equal(result, expected, check_row_order=False)
-    assert result.schema == expected.schema
+    assert result.collect_schema() == expected.collect_schema()
 
 
 def test_map_groups_rolling() -> None:

diff --git a/py-polars/tests/unit/operations/namespaces/array/test_array.py b/py-polars/tests/unit/operations/namespaces/array/test_array.py
@@ -33,7 +33,7 @@ def test_array_min_max_dtype_12123() -> None:
         min=pl.col("a").arr.min().alias("min"),
     )
 
-    assert df.schema == {
+    assert df.collect_schema() == {
         "a": pl.Array(pl.Float64, 2),
         "b": pl.Float64,
         "max": pl.Float64,

diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py
@@ -900,7 +900,7 @@ def test_list_eval_err_raise_15653() -> None:
 
 def test_list_sum_bool_schema() -> None:
     q = pl.LazyFrame({"x": [[True, True, False]]})
-    assert q.select(pl.col("x").list.sum()).schema["x"] == pl.UInt32
+    assert q.select(pl.col("x").list.sum()).collect_schema()["x"] == pl.UInt32
 
 
 def test_list_eval_type_cast_11188() -> None:

diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py
@@ -337,7 +337,7 @@ def test_hex_decode_return_dtype() -> None:
     assert df.schema == {"a": pl.Binary}
 
     ldf = pl.LazyFrame(data).select(expr)
-    assert ldf.schema == {"a": pl.Binary}
+    assert ldf.collect_schema() == {"a": pl.Binary}
 
 
 def test_base64_decode_return_dtype() -> None:
@@ -348,7 +348,7 @@ def test_base64_decode_return_dtype() -> None:
     assert df.schema == {"a": pl.Binary}
 
     ldf = pl.LazyFrame(data).select(expr)
-    assert ldf.schema == {"a": pl.Binary}
+    assert ldf.collect_schema() == {"a": pl.Binary}
 
 
 def test_str_replace_str_replace_all() -> None:
@@ -628,7 +628,7 @@ def test_json_decode_lazy_expr() -> None:
     expected = pl.DataFrame(
         {"json": [{"a": 1, "b": True}, None, {"a": 2, "b": False}]}
     ).lazy()
-    assert ldf.schema == {"json": dtype}
+    assert ldf.collect_schema() == {"json": dtype}
     assert_frame_equal(ldf, expected)
 
 
@@ -1444,7 +1444,7 @@ def test_string_extract_groups_lazy_schema_10305() -> None:
         "captures"
     )
 
-    assert df.schema == {"candidate": pl.String, "ref": pl.String}
+    assert df.collect_schema() == {"candidate": pl.String, "ref": pl.String}
 
 
 def test_string_reverse() -> None:

diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py
@@ -106,7 +106,7 @@ def test_add_business_days_schema() -> None:
     result = lf.select(
         result=pl.col("start").dt.add_business_days("n"),
     )
-    assert result.schema["result"] == pl.Date
+    assert result.collect_schema()["result"] == pl.Date
     assert result.collect().schema["result"] == pl.Date
     assert 'col("start").add_business_days([col("n")])' in result.explain()
 

diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
@@ -344,7 +344,9 @@ def test_base_utc_offset_lazy_schema() -> None:
         eager=True,
     )
     df = pl.DataFrame({"ts": ser}).lazy()
-    result = df.with_columns(base_utc_offset=pl.col("ts").dt.base_utc_offset()).schema
+    result = df.with_columns(
+        base_utc_offset=pl.col("ts").dt.base_utc_offset()
+    ).collect_schema()
     expected = {
         "ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
         "base_utc_offset": pl.Duration(time_unit="ms"),
@@ -382,7 +384,7 @@ def test_dst_offset_lazy_schema() -> None:
         eager=True,
     )
     df = pl.DataFrame({"ts": ser}).lazy()
-    result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).schema
+    result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).collect_schema()
     expected = {
         "ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
         "dst_offset": pl.Duration(time_unit="ms"),

diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py
@@ -7,4 +7,4 @@
 def test_bitwise_integral_schema(op: str) -> None:
     df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
     q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
-    assert q.schema["a"] == df.schema["a"]
+    assert q.collect_schema()["a"] == df.collect_schema()["a"]
diff --git a/py-polars/tests/unit/operations/test_clear.py b/py-polars/tests/unit/operations/test_clear.py
@@ -68,10 +68,10 @@ def test_clear_lf() -> None:
         }
     )
     ldfe = lf.clear()
-    assert ldfe.schema == lf.schema
+    assert ldfe.collect_schema() == lf.collect_schema()
 
     ldfe = lf.clear(2)
-    assert ldfe.schema == lf.schema
+    assert ldfe.collect_schema() == lf.collect_schema()
     assert ldfe.collect().rows() == [(None, None, None), (None, None, None)]
 
 

diff --git a/py-polars/tests/unit/operations/test_ewm_by.py b/py-polars/tests/unit/operations/test_ewm_by.py
@@ -44,7 +44,7 @@ def test_ewma_by_date(sort: bool) -> None:
         {"values": [None, 1.0, 1.9116116523516815, None, 3.815410804703363]}
     )
     assert_frame_equal(result.collect(), expected)
-    assert result.schema["values"] == pl.Float64
+    assert result.collect_schema()["values"] == pl.Float64
     assert result.collect().schema["values"] == pl.Float64
 
 
@@ -88,7 +88,7 @@ def test_ewma_f32() -> None:
         schema_overrides={"values": pl.Float32},
     )
     assert_frame_equal(result.collect(), expected)
-    assert result.schema["values"] == pl.Float32
+    assert result.collect_schema()["values"] == pl.Float32
     assert result.collect().schema["values"] == pl.Float32
 
 
@@ -170,7 +170,7 @@ def test_ewma_by_index(data_type: PolarsIntegerType) -> None:
         {"values": [None, 1.0, 1.9116116523516815, None, 3.815410804703363]}
     )
     assert_frame_equal(result.collect(), expected)
-    assert result.schema["values"] == pl.Float64
+    assert result.collect_schema()["values"] == pl.Float64
     assert result.collect().schema["values"] == pl.Float64
 
 

diff --git a/py-polars/tests/unit/operations/test_fill_null.py b/py-polars/tests/unit/operations/test_fill_null.py
@@ -28,7 +28,7 @@ def test_fill_null_static_schema_4843() -> None:
 
     df2 = df1.select([pl.col(pl.Int64).fill_null(0)])
     df3 = df2.select(pl.col(pl.Int64))
-    assert df3.schema == {"a": pl.Int64, "b": pl.Int64}
+    assert df3.collect_schema() == {"a": pl.Int64, "b": pl.Int64}
 
 
 def test_fill_null_f32_with_lit() -> None:

diff --git a/py-polars/tests/unit/operations/test_gather.py b/py-polars/tests/unit/operations/test_gather.py
@@ -29,7 +29,7 @@ def test_gather_agg_schema() -> None:
         df.lazy()
         .group_by("group", maintain_order=True)
         .agg(pl.col("value").get(1))
-        .schema["value"]
+        .collect_schema()["value"]
         == pl.Int64
     )
 

diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py
@@ -1022,13 +1022,15 @@ def test_schema_on_agg() -> None:
         "first": pl.Int64,
         "last": pl.Int64,
     }
-    assert result.schema == expected_schema
+    assert result.collect_schema() == expected_schema
 
 
 def test_group_by_schema_err() -> None:
     lf = pl.LazyFrame({"foo": [None, 1, 2], "bar": [1, 2, 3]})
     with pytest.raises(pl.ColumnNotFoundError):
-        lf.group_by("not-existent").agg(pl.col("bar").max().alias("max_bar")).schema
+        lf.group_by("not-existent").agg(
+            pl.col("bar").max().alias("max_bar")
+        ).collect_schema()
 
 
 @pytest.mark.parametrize(