Skip to content

Commit

Permalink
fix: Fix union
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 2, 2025
1 parent 1ebd039 commit 450aee8
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 1 deletion.
2 changes: 2 additions & 0 deletions crates/polars-core/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign};

mod schema;
pub use aliases::*;
pub use any_value::*;
pub use arrow::array::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, StaticArray};
Expand All @@ -42,6 +43,7 @@ use polars_utils::abs_diff::AbsDiff;
use polars_utils::float::IsFloat;
use polars_utils::min_max::MinMax;
use polars_utils::nulls::IsNull;
pub use schema::SchemaExtPl;
#[cfg(feature = "serde")]
use serde::de::{EnumAccess, Error, Unexpected, VariantAccess, Visitor};
#[cfg(any(feature = "serde", feature = "serde-lazy"))]
Expand Down
21 changes: 21 additions & 0 deletions crates/polars-core/src/datatypes/schema.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use super::*;

pub trait SchemaExtPl {
// Answers if this schema matches the given schema.
//
// Allows (nested) Null types in this schema to match any type in the schema,
// but not vice versa. In such a case Ok(true) is returned, because a cast
// is necessary. If no cast is necessary Ok(false) is returned, and an
// error is returned if the types are incompatible.
fn matches_schema(&self, other: &Schema) -> PolarsResult<bool>;
}

impl SchemaExtPl for Schema {
fn matches_schema(&self, other: &Schema) -> PolarsResult<bool> {
let mut cast = false;
for (a, b) in self.iter_values().zip(other.iter_values()) {
cast |= a.matches_schema_type(b)?;
}
Ok(cast)
}
}
1 change: 1 addition & 0 deletions crates/polars-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ mod tests;
use std::sync::Mutex;
use std::time::{SystemTime, UNIX_EPOCH};

pub use datatypes::SchemaExtPl;
pub use hashing::IdBuildHasher;
use once_cell::sync::Lazy;
use rayon::{ThreadPool, ThreadPoolBuilder};
Expand Down
4 changes: 3 additions & 1 deletion crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,10 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult
let schema = ctxt.lp_arena.get(first).schema(ctxt.lp_arena);
for n in &inputs[1..] {
let schema_i = ctxt.lp_arena.get(*n).schema(ctxt.lp_arena);
polars_ensure!(schema == schema_i, InvalidOperation: "'union'/'concat' inputs should all have the same schema,\
// The first argument
schema_i.matches_schema(schema.as_ref()).map_err(|_| polars_err!(InvalidOperation: "'union'/'concat' inputs should all have the same schema,\
got\n{:?} and \n{:?}", schema, schema_i)
)?;
}

let options = args.into();
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/operations/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,13 @@ def test_concat_series() -> None:
assert pl.concat([s, s]).len() == 6
# check if s remains unchanged
assert s.len() == 3


def test_concat_null_20501() -> None:
a = pl.DataFrame({"id": [1], "value": ["foo"]})
b = pl.DataFrame({"id": [2], "value": [None]})

assert pl.concat([a.lazy(), b.lazy()]).collect().to_dict(as_series=False) == {
"id": [1, 2],
"value": ["foo", None],
}

0 comments on commit 450aee8

Please sign in to comment.