diff --git a/CHANGELOG.md b/CHANGELOG.md index 253241ca0e..1c5fa8d04d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## 0.7.0 (unreleased) -- Updated Polars to 0.34.2 +- Updated Polars to 0.35.2 - Added support for SQL querying - Added `!` for `Expr` - Added `Config` module diff --git a/Cargo.lock b/Cargo.lock index 3a17e34abf..98aad5c8d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,6 +148,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atoi_simd" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc41b65e01b6851bdcd2d741824e6b310d571396bf3915e31e4792034ee65126" + [[package]] name = "autocfg" version = "1.1.0" @@ -820,15 +826,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" -[[package]] -name = "lexical" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" -dependencies = [ - "lexical-core", -] - [[package]] name = "lexical-core" version = "0.8.5" @@ -1182,25 +1179,6 @@ dependencies = [ "futures", ] -[[package]] -name = "parquet2" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579fe5745f02cef3d5f236bfed216fd4693e49e4e920a13475c6132233283bce" -dependencies = [ - "async-stream", - "brotli", - "flate2", - "futures", - "lz4", - "parquet-format-safe", - "seq-macro", - "snap", - "streaming-decompression", - "xxhash-rust", - "zstd 0.12.4", -] - [[package]] name = "parse-zoneinfo" version = "0.3.0" @@ -1297,17 +1275,18 @@ dependencies = [ "jemallocator", "magnus", "mimalloc", - "polars 0.34.2", + "polars 0.35.2", "polars-core", + "polars-parquet", "serde_json", "smartstring", ] [[package]] name = "polars" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40db657cc67a8dd9fe4b40db5b73027f5f224623545597e1930cbbb9c05b1de5" +checksum = "987f70ba422ed3f6bb566d4a02f5ad35f2cabcdc7f09705f9340a5ee2ad59215" dependencies = [ "getrandom", "polars-core", @@ -1321,45 +1300,46 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1e50c63db77f846ac5119477422f0156f0a1826ceaae7d921f9a6d5ea5f7ca3" +checksum = "682c24cbb6fc05882c0112da75ec037d0afe88241c3778c074d61211bbf063bf" dependencies = [ "ahash", "arrow-format", "atoi", + "atoi_simd", "avro-schema", - "base64", "bytemuck", "chrono", "chrono-tz", "dyn-clone", "either", "ethnum", - "fallible-streaming-iterator", + "fast-float", "foreign_vec", "futures", "getrandom", "hashbrown 0.14.0", - "lexical-core", + "itoa", "lz4", "multiversion", "num-traits", - "parquet2", "polars-error", + "polars-utils", "rustc_version", + "ryu", "serde", "simdutf8", "streaming-iterator", "strength_reduce", - "zstd 0.13.0", + "zstd", ] [[package]] name = "polars-core" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdfb622b8ca81b4614c64d95e7590d6e0571d7d398b5ad595c1abc4412abe714" +checksum = "0fcbe0039cad625b5775c8f8e0dfdd204a71968959ae006322083b3c244475bf" dependencies = [ "ahash", "bitflags 2.4.0", @@ -1391,13 +1371,12 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.34.2" +version = "0.35.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6480520ebde0b20935b600483b865513891e36c04942cebdd19e4f338257b4" +checksum = "bd5581898a3cc5d0930f40f76034a4cf9e18dc4d38e13ef769fb6f427cf63215" dependencies = [ "arrow-format", "avro-schema", - "parquet2", "regex", "simdutf8", "thiserror", @@ -1405,12 +1384,13 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "666466a3b151047c76d99b4e4e5f5438895ef97848008cf49b06df8e3d2d395a" +checksum = "9080fcc2777fcdfb859cfd45029a90096089bd0da15f4f022c23f8276bf5268c" dependencies = [ "ahash", "async-trait", + "atoi_simd", "bytes", "chrono", "chrono-tz", @@ -1418,8 +1398,6 @@ dependencies = [ "futures", "home", "itoa", - "lexical", - "lexical-core", "memchr", "memmap2", "num-traits", @@ -1429,6 +1407,7 @@ dependencies = [ "polars-core", "polars-error", "polars-json", + "polars-parquet", "polars-time", "polars-utils", "rayon", @@ -1445,9 +1424,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24451d2647a9bd51283cc946509c23bac27130565daa5103a156c8507b85b5a3" +checksum = "4d90ab8b514897140841dcf337810d1ea5778ac260fd561896365445e595275c" dependencies = [ "ahash", "chrono", @@ -1466,9 +1445,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e1c2da1ca20106f80d9510090344e7311fd1dcfd6e6b65031e10606c0958c7" +checksum = "f20c79e04461bc3fa085c552c7c18919732ab6cde4e5da9f7b52b48d269b0f11" dependencies = [ "ahash", "bitflags 2.4.0", @@ -1490,9 +1469,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe2d37a6a3ef358499d43aecee80740e62dd44e6cfe7a9c4aa0b8db88de8292" +checksum = "82ad422e9bb854d275f8d700e0f2bbf39cd8f71709a24aa40e54f32ac7410199" dependencies = [ "ahash", "argminmax", @@ -1522,11 +1501,37 @@ dependencies = [ "version_check", ] +[[package]] +name = "polars-parquet" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9d73479f136c036d1ba87d7ea2e9ac89c86ee2c16ab0020a6e15edbb3581135" +dependencies = [ + "ahash", + "async-stream", + "base64", + "brotli", + "ethnum", + "flate2", + "futures", + "lz4", + "num-traits", + "parquet-format-safe", + "polars-arrow", + "polars-error", + "polars-utils", + "seq-macro", + "simdutf8", + "snap", + "streaming-decompression", + "zstd", +] + [[package]] name = "polars-pipe" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6aa050d529be01617f54bc60658149da76f97dbea9fdac3c9d60b811f64a2ba" +checksum = "869f372f023554f8a66fc06060709993b92004f2157188d467741c8fcdb226ff" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1547,9 +1552,9 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c47e5d62d8f612aab61a6331d04c5c95c9ff301106d8b91131c8833b4ef3def6" +checksum = "af139995be30bb6469fdff78669be5da6aad9ae7ec1ec3e538f34ebeaa8b0fb4" dependencies = [ "ahash", "bytemuck", @@ -1561,6 +1566,7 @@ dependencies = [ "polars-core", "polars-io", "polars-ops", + "polars-parquet", "polars-time", "polars-utils", "rayon", @@ -1573,9 +1579,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f05d6544f7d6065fcaa93bc69aac0532ce09aab4f81ec03c9a78dd901bb0c05b" +checksum = "7ae5284d603c3111c217d2451e1b62578eebf2f30d49c32e03790dd4f9cc21ad" dependencies = [ "polars-arrow", "polars-error", @@ -1584,9 +1590,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f65f9c8bfe7f0b2c08c38c79b92ec4ddaf213fc424d94a6272ed7b2d83987f" +checksum = "9357cc60db55c2a407230b42e3ca67ef8495513460ade6f368f48fdd4d2a2505" dependencies = [ "polars-arrow", "polars-core", @@ -1601,9 +1607,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.34.2" +version = "0.35.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3763af36aeeb85ef083f11c43bc28c5b6222e2aae039c5118d916bc855f2b5b9" +checksum = "cdbd4ad82122f9fa8bebfc40dc41183e311b0a8ce2e58f2a48800b5e1136d41e" dependencies = [ "atoi", "chrono", @@ -1622,13 +1628,14 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.34.2" +version = "0.35.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55d2c038ff67e4eb6019682c3f66d83f744e285de9c28e816109a61bace824cd" +checksum = "c18e77b1bddfe72ff522bfedf5b1bf1e9d9651b1e6a22f86c50022d1aab2b651" dependencies = [ "ahash", "bytemuck", "hashbrown 0.14.0", + "indexmap", "num-traits", "once_cell", "polars-error", @@ -1761,6 +1768,26 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "ref-cast" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acde58d073e9c79da00f2b5b84eed919c8326832648a5b109b3fce1bb1175280" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.36", +] + [[package]] name = "regex" version = "1.9.5" @@ -1923,15 +1950,16 @@ dependencies = [ [[package]] name = "simd-json" -version = "0.12.0" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f07a84c7456b901b8dd2c1d44caca8b0fd2c2616206ee5acc9d9da61e8d9ec" +checksum = "e5a3720326b20bf5b95b72dbbd133caae7e0dcf71eae8f6e6656e71a7e5c9aaa" dependencies = [ "ahash", "getrandom", "halfbrown", "lexical-core", "once_cell", + "ref-cast", "serde", "serde_json", "simdutf8", @@ -1995,9 +2023,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.38.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" dependencies = [ "log", ] @@ -2166,9 +2194,9 @@ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "value-trait" -version = "0.6.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a5b6c8ceb01263b969cac48d4a6705134d490ded13d889e52c0cfc80c6945e" +checksum = "ea87257cfcbedcb9444eda79c59fdfea71217e6305afee8ee33f500375c2ac97" dependencies = [ "float-cmp", "halfbrown", @@ -2365,32 +2393,13 @@ dependencies = [ "syn 2.0.36", ] -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe 6.0.6", -] - [[package]] name = "zstd" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ - "zstd-safe 7.0.0", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/ext/polars/Cargo.toml b/ext/polars/Cargo.toml index f78bf8381b..1f338826dd 100644 --- a/ext/polars/Cargo.toml +++ b/ext/polars/Cargo.toml @@ -15,12 +15,13 @@ ahash = "0.8" chrono = "0.4" either = "1.8" magnus = "0.6" -polars-core = "=0.34.2" +polars-core = "=0.35.2" +polars-parquet = "=0.35.2" serde_json = "1" smartstring = "1" [dependencies.polars] -version = "=0.34.2" +version = "=0.35.2" features = [ "abs", "approx_unique", @@ -29,6 +30,7 @@ features = [ "avro", "binary_encoding", "concat_str", + "cov", "cse", "csv", "cum_agg", @@ -55,7 +57,7 @@ features = [ "lazy_regex", "list_count", "list_eval", - "list_take", + "list_gather", "list_to_struct", "log", "meta", @@ -84,8 +86,8 @@ features = [ "sign", "sql", "string_encoding", - "string_from_radix", "string_pad", + "string_to_integer", "strings", "timezones", "to_dummies", diff --git a/ext/polars/src/dataframe.rs b/ext/polars/src/dataframe.rs index d02a4a7033..4e7b89fa7e 100644 --- a/ext/polars/src/dataframe.rs +++ b/ext/polars/src/dataframe.rs @@ -419,7 +419,7 @@ impl RbDataFrame { pub fn write_csv( &self, rb_f: Value, - has_header: bool, + include_header: bool, separator: u8, quote_char: u8, batch_size: usize, @@ -435,7 +435,7 @@ impl RbDataFrame { let f = std::fs::File::create(s).unwrap(); // no need for a buffered writer, because the csv writer does internal buffering CsvWriter::new(f) - .has_header(has_header) + .include_header(include_header) .with_separator(separator) .with_quote_char(quote_char) .with_batch_size(batch_size) @@ -449,7 +449,7 @@ impl RbDataFrame { } else { let mut buf = Cursor::new(Vec::new()); CsvWriter::new(&mut buf) - .has_header(has_header) + .include_header(include_header) .with_separator(separator) .with_quote_char(quote_char) .with_batch_size(batch_size) @@ -994,30 +994,48 @@ impl RbDataFrame { self.df.borrow().median().into() } - pub fn hmean(&self, null_strategy: Wrap) -> RbResult> { + pub fn max_horizontal(&self) -> RbResult> { let s = self .df .borrow() - .hmean(null_strategy.0) + .max_horizontal() .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } - pub fn hmax(&self) -> RbResult> { - let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?; + pub fn min_horizontal(&self) -> RbResult> { + let s = self + .df + .borrow() + .min_horizontal() + .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } - pub fn hmin(&self) -> RbResult> { - let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?; + pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult> { + let null_strategy = if ignore_nulls { + NullStrategy::Ignore + } else { + NullStrategy::Propagate + }; + let s = self + .df + .borrow() + .sum_horizontal(null_strategy) + .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } - pub fn hsum(&self, null_strategy: Wrap) -> RbResult> { + pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult> { + let null_strategy = if ignore_nulls { + NullStrategy::Ignore + } else { + NullStrategy::Propagate + }; let s = self .df .borrow() - .hsum(null_strategy.0) + .mean_horizontal(null_strategy) .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } diff --git a/ext/polars/src/expr/datetime.rs b/ext/polars/src/expr/datetime.rs index d63c3b2c55..07a7e96200 100644 --- a/ext/polars/src/expr/datetime.rs +++ b/ext/polars/src/expr/datetime.rs @@ -45,11 +45,11 @@ impl RbExpr { .into() } - pub fn dt_truncate(&self, every: &Self, offset: String, ambiguous: &Self) -> Self { + pub fn dt_truncate(&self, every: &Self, offset: String) -> Self { self.inner .clone() .dt() - .truncate(every.inner.clone(), offset, ambiguous.inner.clone()) + .truncate(every.inner.clone(), offset) .into() } @@ -61,12 +61,8 @@ impl RbExpr { self.inner.clone().dt().month_end().into() } - pub fn dt_round(&self, every: String, offset: String, ambiguous: &Self) -> Self { - self.inner - .clone() - .dt() - .round(&every, &offset, ambiguous.inner.clone()) - .into() + pub fn dt_round(&self, every: String, offset: String) -> Self { + self.inner.clone().dt().round(&every, &offset).into() } pub fn dt_combine(&self, time: &Self, time_unit: Wrap) -> Self { diff --git a/ext/polars/src/expr/general.rs b/ext/polars/src/expr/general.rs index f7a089e3e8..45485afbc0 100644 --- a/ext/polars/src/expr/general.rs +++ b/ext/polars/src/expr/general.rs @@ -250,8 +250,8 @@ impl RbExpr { .into() } - pub fn take(&self, idx: &RbExpr) -> Self { - self.clone().inner.take(idx.inner.clone()).into() + pub fn gather(&self, idx: &RbExpr) -> Self { + self.clone().inner.gather(idx.inner.clone()).into() } pub fn sort_by(&self, by: RArray, reverse: Vec) -> RbResult { @@ -267,14 +267,13 @@ impl RbExpr { self.clone().inner.forward_fill(limit).into() } - pub fn shift(&self, periods: i64) -> Self { - self.clone().inner.shift(periods).into() - } - pub fn shift_and_fill(&self, periods: i64, fill_value: &RbExpr) -> Self { - self.clone() - .inner - .shift_and_fill(periods, fill_value.inner.clone()) - .into() + pub fn shift(&self, n: &Self, fill_value: Option<&Self>) -> Self { + let expr = self.inner.clone(); + let out = match fill_value { + Some(v) => expr.shift_and_fill(n.inner.clone(), v.inner.clone()), + None => expr.shift(n.inner.clone()), + }; + out.into() } pub fn fill_null(&self, expr: &RbExpr) -> Self { @@ -346,14 +345,14 @@ impl RbExpr { self.clone().inner.explode().into() } - pub fn take_every(&self, n: usize) -> Self { + pub fn gather_every(&self, n: usize) -> Self { self.clone() .inner .map( - move |s: Series| Ok(Some(s.take_every(n))), + move |s: Series| Ok(Some(s.gather_every(n))), GetOutput::same_type(), ) - .with_fmt("take_every") + .with_fmt("gather_every") .into() } @@ -498,20 +497,20 @@ impl RbExpr { self.clone().inner.pow(exponent.inner.clone()).into() } - pub fn cumsum(&self, reverse: bool) -> Self { - self.clone().inner.cumsum(reverse).into() + pub fn cum_sum(&self, reverse: bool) -> Self { + self.clone().inner.cum_sum(reverse).into() } - pub fn cummax(&self, reverse: bool) -> Self { - self.clone().inner.cummax(reverse).into() + pub fn cum_max(&self, reverse: bool) -> Self { + self.clone().inner.cum_max(reverse).into() } - pub fn cummin(&self, reverse: bool) -> Self { - self.clone().inner.cummin(reverse).into() + pub fn cum_min(&self, reverse: bool) -> Self { + self.clone().inner.cum_min(reverse).into() } - pub fn cumprod(&self, reverse: bool) -> Self { - self.clone().inner.cumprod(reverse).into() + pub fn cum_prod(&self, reverse: bool) -> Self { + self.clone().inner.cum_prod(reverse).into() } pub fn product(&self) -> Self { @@ -598,8 +597,8 @@ impl RbExpr { self.inner.clone().reshape(&dims).into() } - pub fn cumcount(&self, reverse: bool) -> Self { - self.inner.clone().cumcount(reverse).into() + pub fn cum_count(&self, reverse: bool) -> Self { + self.inner.clone().cum_count(reverse).into() } pub fn to_physical(&self) -> Self { diff --git a/ext/polars/src/expr/string.rs b/ext/polars/src/expr/string.rs index 0e9570f9cf..efb3960de2 100644 --- a/ext/polars/src/expr/string.rs +++ b/ext/polars/src/expr/string.rs @@ -4,8 +4,12 @@ use crate::conversion::Wrap; use crate::RbExpr; impl RbExpr { - pub fn str_concat(&self, delimiter: String) -> Self { - self.inner.clone().str().concat(&delimiter).into() + pub fn str_concat(&self, delimiter: String, ignore_nulls: bool) -> Self { + self.inner + .clone() + .str() + .concat(&delimiter, ignore_nulls) + .into() } pub fn str_to_date( @@ -232,11 +236,11 @@ impl RbExpr { .into() } - pub fn str_parse_int(&self, radix: u32, strict: bool) -> Self { + pub fn str_to_integer(&self, base: u32, strict: bool) -> Self { self.inner .clone() .str() - .from_radix(radix, strict) + .to_integer(base, strict) .with_fmt("str.parse_int") .into() } diff --git a/ext/polars/src/functions/aggregation.rs b/ext/polars/src/functions/aggregation.rs index 6a994f748c..93b4e713a4 100644 --- a/ext/polars/src/functions/aggregation.rs +++ b/ext/polars/src/functions/aggregation.rs @@ -2,29 +2,34 @@ use magnus::RArray; use polars::lazy::dsl; use crate::rb_exprs_to_exprs; -use crate::{RbExpr, RbResult}; +use crate::{RbExpr, RbPolarsErr, RbResult}; pub fn all_horizontal(exprs: RArray) -> RbResult { let exprs = rb_exprs_to_exprs(exprs)?; - Ok(dsl::all_horizontal(exprs).into()) + let e = dsl::all_horizontal(exprs).map_err(RbPolarsErr::from)?; + Ok(e.into()) } pub fn any_horizontal(exprs: RArray) -> RbResult { let exprs = rb_exprs_to_exprs(exprs)?; - Ok(dsl::any_horizontal(exprs).into()) + let e = dsl::any_horizontal(exprs).map_err(RbPolarsErr::from)?; + Ok(e.into()) } pub fn max_horizontal(exprs: RArray) -> RbResult { let exprs = rb_exprs_to_exprs(exprs)?; - Ok(dsl::max_horizontal(exprs).into()) + let e = dsl::max_horizontal(exprs).map_err(RbPolarsErr::from)?; + Ok(e.into()) } pub fn min_horizontal(exprs: RArray) -> RbResult { let exprs = rb_exprs_to_exprs(exprs)?; - Ok(dsl::min_horizontal(exprs).into()) + let e = dsl::min_horizontal(exprs).map_err(RbPolarsErr::from)?; + Ok(e.into()) } pub fn sum_horizontal(exprs: RArray) -> RbResult { let exprs = rb_exprs_to_exprs(exprs)?; - Ok(dsl::sum_horizontal(exprs).into()) + let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?; + Ok(e.into()) } diff --git a/ext/polars/src/functions/io.rs b/ext/polars/src/functions/io.rs index 33b77cc040..b96eb2842a 100644 --- a/ext/polars/src/functions/io.rs +++ b/ext/polars/src/functions/io.rs @@ -11,15 +11,15 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult { let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?; let dict = RHash::new(); - for field in metadata.schema.fields { + for field in &metadata.schema.fields { let dt: Wrap = Wrap((&field.data_type).into()); - dict.aset(field.name, dt)?; + dict.aset(field.name.clone(), dt)?; } Ok(dict) } pub fn read_parquet_schema(rb_f: Value) -> RbResult { - use polars_core::export::arrow::io::parquet::read::{infer_schema, read_metadata}; + use polars_parquet::read::{infer_schema, read_metadata}; let mut r = get_file_like(rb_f, false)?; let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?; diff --git a/ext/polars/src/functions/lazy.rs b/ext/polars/src/functions/lazy.rs index ac326f4ff2..b42e0cfd5b 100644 --- a/ext/polars/src/functions/lazy.rs +++ b/ext/polars/src/functions/lazy.rs @@ -177,7 +177,7 @@ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) - let func = move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b); - Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into()) + Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into()) } pub fn lit(value: Value, allow_object: bool) -> RbResult { @@ -249,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool .into() } -pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr { - polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into() +pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr { + polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into() } pub fn concat_str(s: RArray, sep: String) -> RbResult { diff --git a/ext/polars/src/lazyframe.rs b/ext/polars/src/lazyframe.rs index 30ff47c42c..e2a7018e85 100644 --- a/ext/polars/src/lazyframe.rs +++ b/ext/polars/src/lazyframe.rs @@ -219,6 +219,7 @@ impl RbLazyFrame { slice_pushdown: bool, cse: bool, allow_streaming: bool, + _eager: bool, ) -> RbLazyFrame { let ldf = self.ldf.clone(); let ldf = ldf @@ -228,6 +229,7 @@ impl RbLazyFrame { .with_slice_pushdown(slice_pushdown) .with_comm_subplan_elim(cse) .with_streaming(allow_streaming) + ._with_eager(_eager) .with_projection_pushdown(projection_pushdown); ldf.into() } @@ -494,14 +496,13 @@ impl RbLazyFrame { ldf.reverse().into() } - pub fn shift(&self, periods: i64) -> Self { - let ldf = self.ldf.clone(); - ldf.shift(periods).into() - } - - pub fn shift_and_fill(&self, periods: i64, fill_value: &RbExpr) -> Self { - let ldf = self.ldf.clone(); - ldf.shift_and_fill(periods, fill_value.inner.clone()).into() + pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self { + let lf = self.ldf.clone(); + let out = match fill_value { + Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()), + None => lf.shift(n.inner.clone()), + }; + out.into() } pub fn fill_nan(&self, fill_value: &RbExpr) -> Self { diff --git a/ext/polars/src/lib.rs b/ext/polars/src/lib.rs index fd876bcf34..ec7b3948de 100644 --- a/ext/polars/src/lib.rs +++ b/ext/polars/src/lib.rs @@ -257,10 +257,10 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("std", method!(RbDataFrame::std, 1))?; class.define_method("var", method!(RbDataFrame::var, 1))?; class.define_method("median", method!(RbDataFrame::median, 0))?; - class.define_method("hmean", method!(RbDataFrame::hmean, 1))?; - class.define_method("hmax", method!(RbDataFrame::hmax, 0))?; - class.define_method("hmin", method!(RbDataFrame::hmin, 0))?; - class.define_method("hsum", method!(RbDataFrame::hsum, 1))?; + class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?; + class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?; + class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?; + class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?; class.define_method("quantile", method!(RbDataFrame::quantile, 2))?; class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?; class.define_method("null_count", method!(RbDataFrame::null_count, 0))?; @@ -324,12 +324,11 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("arg_max", method!(RbExpr::arg_max, 0))?; class.define_method("arg_min", method!(RbExpr::arg_min, 0))?; class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?; - class.define_method("take", method!(RbExpr::take, 1))?; + class.define_method("gather", method!(RbExpr::gather, 1))?; class.define_method("sort_by", method!(RbExpr::sort_by, 2))?; class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?; class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?; - class.define_method("shift", method!(RbExpr::shift, 1))?; - class.define_method("shift_and_fill", method!(RbExpr::shift_and_fill, 2))?; + class.define_method("shift", method!(RbExpr::shift, 2))?; class.define_method("fill_null", method!(RbExpr::fill_null, 1))?; class.define_method( "fill_null_with_strategy", @@ -347,7 +346,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("is_first_distinct", method!(RbExpr::is_first_distinct, 0))?; class.define_method("is_last_distinct", method!(RbExpr::is_last_distinct, 0))?; class.define_method("explode", method!(RbExpr::explode, 0))?; - class.define_method("take_every", method!(RbExpr::take_every, 1))?; + class.define_method("gather_every", method!(RbExpr::gather_every, 1))?; class.define_method("tail", method!(RbExpr::tail, 1))?; class.define_method("head", method!(RbExpr::head, 1))?; class.define_method("slice", method!(RbExpr::slice, 2))?; @@ -379,10 +378,10 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("is_in", method!(RbExpr::is_in, 1))?; class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?; class.define_method("pow", method!(RbExpr::pow, 1))?; - class.define_method("cumsum", method!(RbExpr::cumsum, 1))?; - class.define_method("cummax", method!(RbExpr::cummax, 1))?; - class.define_method("cummin", method!(RbExpr::cummin, 1))?; - class.define_method("cumprod", method!(RbExpr::cumprod, 1))?; + class.define_method("cum_sum", method!(RbExpr::cum_sum, 1))?; + class.define_method("cum_max", method!(RbExpr::cum_max, 1))?; + class.define_method("cum_min", method!(RbExpr::cum_min, 1))?; + class.define_method("cum_prod", method!(RbExpr::cum_prod, 1))?; class.define_method("product", method!(RbExpr::product, 0))?; class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?; class.define_method("str_to_date", method!(RbExpr::str_to_date, 4))?; @@ -423,7 +422,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?; class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?; class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?; - class.define_method("str_parse_int", method!(RbExpr::str_parse_int, 2))?; + class.define_method("str_to_integer", method!(RbExpr::str_to_integer, 2))?; class.define_method("str_json_extract", method!(RbExpr::str_json_extract, 2))?; class.define_method("binary_hex_encode", method!(RbExpr::bin_hex_encode, 0))?; class.define_method("binary_hex_decode", method!(RbExpr::bin_hex_decode, 1))?; @@ -504,10 +503,10 @@ fn init(ruby: &Ruby) -> RbResult<()> { "dt_replace_time_zone", method!(RbExpr::dt_replace_time_zone, 2), )?; - class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 3))?; + class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?; class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?; class.define_method("dt_month_end", method!(RbExpr::dt_month_end, 0))?; - class.define_method("dt_round", method!(RbExpr::dt_round, 3))?; + class.define_method("dt_round", method!(RbExpr::dt_round, 2))?; class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?; class.define_method("map", method!(RbExpr::map, 3))?; class.define_method("dot", method!(RbExpr::dot, 1))?; @@ -549,10 +548,10 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("pct_change", method!(RbExpr::pct_change, 1))?; class.define_method("skew", method!(RbExpr::skew, 1))?; class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?; - class.define_method("str_concat", method!(RbExpr::str_concat, 1))?; + class.define_method("str_concat", method!(RbExpr::str_concat, 2))?; class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?; class.define_method("reshape", method!(RbExpr::reshape, 1))?; - class.define_method("cumcount", method!(RbExpr::cumcount, 1))?; + class.define_method("cum_count", method!(RbExpr::cum_count, 1))?; class.define_method("to_physical", method!(RbExpr::to_physical, 0))?; class.define_method("shuffle", method!(RbExpr::shuffle, 1))?; class.define_method("sample_n", method!(RbExpr::sample_n, 4))?; @@ -630,7 +629,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { "spearman_rank_corr", function!(crate::functions::lazy::spearman_rank_corr, 4), )?; - class.define_singleton_method("cov", function!(crate::functions::lazy::cov, 2))?; + class.define_singleton_method("cov", function!(crate::functions::lazy::cov, 3))?; class.define_singleton_method( "arg_sort_by", function!(crate::functions::lazy::arg_sort_by, 2), @@ -665,7 +664,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { )?; class.define_method( "optimization_toggle", - method!(RbLazyFrame::optimization_toggle, 7), + method!(RbLazyFrame::optimization_toggle, 8), )?; class.define_method("sort", method!(RbLazyFrame::sort, 4))?; class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 4))?; @@ -690,8 +689,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?; class.define_method("rename", method!(RbLazyFrame::rename, 2))?; class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?; - class.define_method("shift", method!(RbLazyFrame::shift, 1))?; - class.define_method("shift_and_fill", method!(RbLazyFrame::shift_and_fill, 2))?; + class.define_method("shift", method!(RbLazyFrame::shift, 2))?; class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?; class.define_method("min", method!(RbLazyFrame::min, 0))?; class.define_method("max", method!(RbLazyFrame::max, 0))?; diff --git a/lib/polars/data_frame.rb b/lib/polars/data_frame.rb index 42252b3a4b..9b110f660e 100644 --- a/lib/polars/data_frame.rb +++ b/lib/polars/data_frame.rb @@ -899,6 +899,7 @@ def write_ndjson(file) def write_csv( file = nil, has_header: true, + include_header: nil, sep: ",", quote: '"', batch_size: 1024, @@ -908,6 +909,8 @@ def write_csv( float_precision: nil, null_value: nil ) + include_header = has_header if include_header.nil? + if sep.length > 1 raise ArgumentError, "only single byte separator is allowed" elsif quote.length > 1 @@ -921,7 +924,7 @@ def write_csv( buffer.set_encoding(Encoding::BINARY) _df.write_csv( buffer, - has_header, + include_header, sep.ord, quote.ord, batch_size, @@ -940,7 +943,7 @@ def write_csv( _df.write_csv( file, - has_header, + include_header, sep.ord, quote.ord, batch_size, @@ -3447,8 +3450,10 @@ def partition_by(groups, maintain_order: true, include_key: true, as_dict: false # Shift values by the given period. # - # @param periods [Integer] + # @param n [Integer] # Number of places to shift (may be negative). + # @param fill_value [Object] + # Fill the resulting null values with this value. # # @return [DataFrame] # @@ -3486,8 +3491,8 @@ def partition_by(groups, maintain_order: true, include_key: true, as_dict: false # # │ 3 ┆ 8 ┆ c │ # # │ null ┆ null ┆ null │ # # └──────┴──────┴──────┘ - def shift(periods) - _from_rbdf(_df.shift(periods)) + def shift(n, fill_value: nil) + lazy.shift(n, fill_value: fill_value).collect(_eager: true) end # Shift the values by a given period and fill the resulting null values. @@ -3520,9 +3525,7 @@ def shift(periods) # # │ 2 ┆ 7 ┆ b │ # # └─────┴─────┴─────┘ def shift_and_fill(periods, fill_value) - lazy - .shift_and_fill(periods, fill_value) - .collect(no_optimization: true, string_cache: false) + shift(periods, fill_value: fill_value) end # Get a mask of all duplicated rows in this DataFrame. @@ -3773,7 +3776,7 @@ def max(axis: 0) if axis == 0 _from_rbdf(_df.max) elsif axis == 1 - Utils.wrap_s(_df.hmax) + Utils.wrap_s(_df.max_horizontal) else raise ArgumentError, "Axis should be 0 or 1." end @@ -3805,7 +3808,7 @@ def min(axis: 0) if axis == 0 _from_rbdf(_df.min) elsif axis == 1 - Utils.wrap_s(_df.hmin) + Utils.wrap_s(_df.min_horizontal) else raise ArgumentError, "Axis should be 0 or 1." end @@ -3854,7 +3857,7 @@ def sum(axis: 0, null_strategy: "ignore") when 0 _from_rbdf(_df.sum) when 1 - Utils.wrap_s(_df.hsum(null_strategy)) + Utils.wrap_s(_df.sum_horizontal(null_strategy)) else raise ArgumentError, "Axis should be 0 or 1." end @@ -3892,7 +3895,7 @@ def mean(axis: 0, null_strategy: "ignore") when 0 _from_rbdf(_df.mean) when 1 - Utils.wrap_s(_df.hmean(null_strategy)) + Utils.wrap_s(_df.mean_horizontal(null_strategy)) else raise ArgumentError, "Axis should be 0 or 1." end @@ -4589,7 +4592,7 @@ def shrink_to_fit(in_place: false) # # @example # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]}) - # s.take_every(2) + # s.gather_every(2) # # => # # shape: (2, 2) # # ┌─────┬─────┐ @@ -4600,9 +4603,10 @@ def shrink_to_fit(in_place: false) # # │ 1 ┆ 5 │ # # │ 3 ┆ 7 │ # # └─────┴─────┘ - def take_every(n) - select(Utils.col("*").take_every(n)) + def gather_every(n) + select(Utils.col("*").gather_every(n)) end + alias_method :take_every, :gather_every # Hash and combine the rows in this DataFrame. # diff --git a/lib/polars/date_time_expr.rb b/lib/polars/date_time_expr.rb index a94311c2ec..3ec2105f6c 100644 --- a/lib/polars/date_time_expr.rb +++ b/lib/polars/date_time_expr.rb @@ -97,9 +97,7 @@ def initialize(expr) # # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │ # # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │ # # └─────────────────────┴─────────────────────┘ - def truncate(every, offset: nil, use_earliest: nil, ambiguous: "raise") - ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous) - ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr) + def truncate(every, offset: nil, use_earliest: nil) if offset.nil? offset = "0ns" end @@ -113,7 +111,6 @@ def truncate(every, offset: nil, use_earliest: nil, ambiguous: "raise") _rbexpr.dt_truncate( every, Utils._timedelta_to_pl_duration(offset), - ambiguous._rbexpr ) ) end @@ -213,18 +210,15 @@ def truncate(every, offset: nil, use_earliest: nil, ambiguous: "raise") # # │ 2001-01-01 00:50:00 ┆ 2001-01-01 01:00:00 │ # # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │ # # └─────────────────────┴─────────────────────┘ - def round(every, offset: nil, ambiguous: "raise") + def round(every, offset: nil) if offset.nil? offset = "0ns" end - ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr) - Utils.wrap_expr( _rbexpr.dt_round( Utils._timedelta_to_pl_duration(every), - Utils._timedelta_to_pl_duration(offset), - ambiguous._rbexpr + Utils._timedelta_to_pl_duration(offset) ) ) end diff --git a/lib/polars/expr.rb b/lib/polars/expr.rb index 5e68cfe100..6ac1206db4 100644 --- a/lib/polars/expr.rb +++ b/lib/polars/expr.rb @@ -912,8 +912,8 @@ def drop_nans # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]}) # df.select( # [ - # Polars.col("a").cumsum, - # Polars.col("a").cumsum(reverse: true).alias("a_reverse") + # Polars.col("a").cum_sum, + # Polars.col("a").cum_sum(reverse: true).alias("a_reverse") # ] # ) # # => @@ -928,9 +928,10 @@ def drop_nans # # │ 6 ┆ 7 │ # # │ 10 ┆ 4 │ # # └─────┴───────────┘ - def cumsum(reverse: false) - wrap_expr(_rbexpr.cumsum(reverse)) + def cum_sum(reverse: false) + wrap_expr(_rbexpr.cum_sum(reverse)) end + alias_method :cumsum, :cum_sum # Get an array with the cumulative product computed at every element. # @@ -947,8 +948,8 @@ def cumsum(reverse: false) # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]}) # df.select( # [ - # Polars.col("a").cumprod, - # Polars.col("a").cumprod(reverse: true).alias("a_reverse") + # Polars.col("a").cum_prod, + # Polars.col("a").cum_prod(reverse: true).alias("a_reverse") # ] # ) # # => @@ -963,9 +964,10 @@ def cumsum(reverse: false) # # │ 6 ┆ 12 │ # # │ 24 ┆ 4 │ # # └─────┴───────────┘ - def cumprod(reverse: false) - wrap_expr(_rbexpr.cumprod(reverse)) + def cum_prod(reverse: false) + wrap_expr(_rbexpr.cum_prod(reverse)) end + alias_method :cumprod, :cum_prod # Get an array with the cumulative min computed at every element. # @@ -978,8 +980,8 @@ def cumprod(reverse: false) # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]}) # df.select( # [ - # Polars.col("a").cummin, - # Polars.col("a").cummin(reverse: true).alias("a_reverse") + # Polars.col("a").cum_min, + # Polars.col("a").cum_min(reverse: true).alias("a_reverse") # ] # ) # # => @@ -994,9 +996,10 @@ def cumprod(reverse: false) # # │ 1 ┆ 3 │ # # │ 1 ┆ 4 │ # # └─────┴───────────┘ - def cummin(reverse: false) - wrap_expr(_rbexpr.cummin(reverse)) + def cum_min(reverse: false) + wrap_expr(_rbexpr.cum_min(reverse)) end + alias_method :cummin, :cum_min # Get an array with the cumulative max computed at every element. # @@ -1009,8 +1012,8 @@ def cummin(reverse: false) # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]}) # df.select( # [ - # Polars.col("a").cummax, - # Polars.col("a").cummax(reverse: true).alias("a_reverse") + # Polars.col("a").cum_max, + # Polars.col("a").cum_max(reverse: true).alias("a_reverse") # ] # ) # # => @@ -1025,9 +1028,10 @@ def cummin(reverse: false) # # │ 3 ┆ 4 │ # # │ 4 ┆ 4 │ # # └─────┴───────────┘ - def cummax(reverse: false) - wrap_expr(_rbexpr.cummax(reverse)) + def cum_max(reverse: false) + wrap_expr(_rbexpr.cum_max(reverse)) end + alias_method :cummax, :cum_max # Get an array with the cumulative count computed at every element. # @@ -1042,8 +1046,8 @@ def cummax(reverse: false) # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]}) # df.select( # [ - # Polars.col("a").cumcount, - # Polars.col("a").cumcount(reverse: true).alias("a_reverse") + # Polars.col("a").cum_count, + # Polars.col("a").cum_count(reverse: true).alias("a_reverse") # ] # ) # # => @@ -1058,9 +1062,10 @@ def cummax(reverse: false) # # │ 2 ┆ 1 │ # # │ 3 ┆ 0 │ # # └─────┴───────────┘ - def cumcount(reverse: false) - wrap_expr(_rbexpr.cumcount(reverse)) + def cum_count(reverse: false) + wrap_expr(_rbexpr.cum_count(reverse)) end + alias_method :cumcount, :cum_count # Rounds down to the nearest integer value. # @@ -1586,19 +1591,22 @@ def sort_by(by, reverse: false) # # │ one ┆ [2, 98] │ # # │ two ┆ [4, 99] │ # # └───────┴───────────┘ - def take(indices) + def gather(indices) if indices.is_a?(::Array) indices_lit = Polars.lit(Series.new("", indices, dtype: :u32)) else indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false) end - wrap_expr(_rbexpr.take(indices_lit._rbexpr)) + wrap_expr(_rbexpr.gather(indices_lit._rbexpr)) end + alias_method :take, :gather # Shift the values by a given period. # - # @param periods [Integer] + # @param n [Integer] # Number of places to shift (may be negative). + # @param fill_value [Object] + # Fill the resulting null values with this value. # # @return [Expr] # @@ -1617,8 +1625,12 @@ def take(indices) # # │ 2 │ # # │ 3 │ # # └──────┘ - def shift(periods = 1) - wrap_expr(_rbexpr.shift(periods)) + def shift(n = 1, fill_value: nil) + if !fill_value.nil? + fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true) + end + n = Utils.parse_as_expression(n) + wrap_expr(_rbexpr.shift(n, fill_value)) end # Shift the values by a given period and fill the resulting null values. @@ -1646,8 +1658,7 @@ def shift(periods = 1) # # │ 3 │ # # └─────┘ def shift_and_fill(periods, fill_value) - fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true) - wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr)) + shift(periods, fill_value: fill_value) end # Fill null values using the specified value or strategy. @@ -2728,7 +2739,7 @@ def explode # # @example # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]}) - # df.select(Polars.col("foo").take_every(3)) + # df.select(Polars.col("foo").gather_every(3)) # # => # # shape: (3, 1) # # ┌─────┐ @@ -2740,9 +2751,10 @@ def explode # # │ 4 │ # # │ 7 │ # # └─────┘ - def take_every(n) - wrap_expr(_rbexpr.take_every(n)) + def gather_every(n) + wrap_expr(_rbexpr.gather_every(n)) end + alias_method :take_every, :gather_every # Get the first `n` rows. # diff --git a/lib/polars/lazy_frame.rb b/lib/polars/lazy_frame.rb index bea9b5cef3..8f4759134b 100644 --- a/lib/polars/lazy_frame.rb +++ b/lib/polars/lazy_frame.rb @@ -352,6 +352,7 @@ def describe_optimized_plan( slice_pushdown, common_subplan_elimination, allow_streaming, + false ) ldf.describe_optimized_plan @@ -468,7 +469,8 @@ def collect( no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, - allow_streaming: false + allow_streaming: false, + _eager: false ) if no_optimization predicate_pushdown = false @@ -488,7 +490,8 @@ def collect( simplify_expression, slice_pushdown, common_subplan_elimination, - allow_streaming + allow_streaming, + _eager ) Utils.wrap_df(ldf.collect) end @@ -570,7 +573,8 @@ def sink_parquet( simplify_expression, slice_pushdown, false, - true + true, + false ) lf.sink_parquet( path, @@ -662,7 +666,8 @@ def fetch( simplify_expression, slice_pushdown, common_subplan_elimination, - allow_streaming + allow_streaming, + false ) Utils.wrap_df(ldf.fetch(n_rows)) end @@ -1744,8 +1749,10 @@ def reverse # Shift the values by a given period. # - # @param periods [Integer] + # @param n [Integer] # Number of places to shift (may be negative). + # @param fill_value [Object] + # Fill the resulting null values with this value. # # @return [LazyFrame] # @@ -1782,8 +1789,12 @@ def reverse # # │ 5 ┆ 6 │ # # │ null ┆ null │ # # └──────┴──────┘ - def shift(periods) - _from_rbldf(_ldf.shift(periods)) + def shift(n, fill_value: nil) + if !fill_value.nil? + fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true) + end + n = Utils.parse_as_expression(n) + _from_rbldf(_ldf.shift(n, fill_value)) end # Shift the values by a given period and fill the resulting null values. @@ -1829,10 +1840,7 @@ def shift(periods) # # │ 0 ┆ 0 │ # # └─────┴─────┘ def shift_and_fill(periods, fill_value) - if !fill_value.is_a?(Expr) - fill_value = Polars.lit(fill_value) - end - _from_rbldf(_ldf.shift_and_fill(periods, fill_value._rbexpr)) + shift(periods, fill_value: fill_value) end # Get a slice of this DataFrame. diff --git a/lib/polars/lazy_functions.rb b/lib/polars/lazy_functions.rb index 4aa5b74e0e..5583012e34 100644 --- a/lib/polars/lazy_functions.rb +++ b/lib/polars/lazy_functions.rb @@ -931,7 +931,8 @@ def collect_all( simplify_expression, slice_pushdown, common_subplan_elimination, - allow_streaming + allow_streaming, + false ) prepared << ldf end diff --git a/lib/polars/string_expr.rb b/lib/polars/string_expr.rb index ff0afb0e71..1aac5c05e9 100644 --- a/lib/polars/string_expr.rb +++ b/lib/polars/string_expr.rb @@ -279,6 +279,8 @@ def n_chars # # @param delimiter [String] # The delimiter to insert between consecutive string values. + # @param ignore_nulls [Boolean] + # Ignore null values (default). # # @return [Expr] # @@ -287,15 +289,28 @@ def n_chars # df.select(Polars.col("foo").str.concat("-")) # # => # # shape: (1, 1) - # # ┌──────────┐ - # # │ foo │ - # # │ --- │ - # # │ str │ - # # ╞══════════╡ - # # │ 1-null-2 │ - # # └──────────┘ - def concat(delimiter = "-") - Utils.wrap_expr(_rbexpr.str_concat(delimiter)) + # # ┌─────┐ + # # │ foo │ + # # │ --- │ + # # │ str │ + # # ╞═════╡ + # # │ 1-2 │ + # # └─────┘ + # + # @example + # df = Polars::DataFrame.new({"foo" => [1, nil, 2]}) + # df.select(Polars.col("foo").str.concat("-", ignore_nulls: false)) + # # => + # # shape: (1, 1) + # # ┌──────┐ + # # │ foo │ + # # │ --- │ + # # │ str │ + # # ╞══════╡ + # # │ null │ + # # └──────┘ + def concat(delimiter = "-", ignore_nulls: true) + Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls)) end # Transform to uppercase variant. @@ -1100,6 +1115,52 @@ def explode Utils.wrap_expr(_rbexpr.str_explode) end + # Convert an Utf8 column into an Int64 column with base radix. + # + # @param base [Integer] + # Positive integer which is the base of the string we are parsing. + # Default: 10. + # @param strict [Boolean] + # Bool, default=true will raise any ParseError or overflow as ComputeError. + # false silently convert to Null. + # + # @return [Expr] + # + # @example + # df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]}) + # df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed")) + # # => + # # shape: (4, 2) + # # ┌─────────┬────────┐ + # # │ bin ┆ parsed │ + # # │ --- ┆ --- │ + # # │ str ┆ i64 │ + # # ╞═════════╪════════╡ + # # │ 110 ┆ 6 │ + # # │ 101 ┆ 5 │ + # # │ 010 ┆ 2 │ + # # │ invalid ┆ null │ + # # └─────────┴────────┘ + # + # @example + # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]}) + # df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed")) + # # => + # # shape: (4, 2) + # # ┌──────┬────────┐ + # # │ hex ┆ parsed │ + # # │ --- ┆ --- │ + # # │ str ┆ i64 │ + # # ╞══════╪════════╡ + # # │ fa1e ┆ 64030 │ + # # │ ff00 ┆ 65280 │ + # # │ cafe ┆ 51966 │ + # # │ null ┆ null │ + # # └──────┴────────┘ + def to_integer(base: 10, strict: true) + Utils.wrap_expr(_rbexpr.str_to_integer(base, strict)) + end + # Parse integers with base radix from strings. # # By default base 2. ParseError/Overflows become Nulls. @@ -1128,24 +1189,8 @@ def explode # # │ 2 │ # # │ null │ # # └──────┘ - # - # @example - # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]}) - # df.select(Polars.col("hex").str.parse_int(16, strict: true)) - # # => - # # shape: (4, 1) - # # ┌───────┐ - # # │ hex │ - # # │ --- │ - # # │ i32 │ - # # ╞═══════╡ - # # │ 64030 │ - # # │ 65280 │ - # # │ 51966 │ - # # │ null │ - # # └───────┘ def parse_int(radix = 2, strict: true) - Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict)) + to_integer(base: 2, strict: strict).cast(Int32, strict: strict) end private diff --git a/lib/polars/string_name_space.rb b/lib/polars/string_name_space.rb index 773f843e66..dd376127ba 100644 --- a/lib/polars/string_name_space.rb +++ b/lib/polars/string_name_space.rb @@ -233,9 +233,23 @@ def n_chars # @return [Series] # # @example - # Polars::Series.new([1, nil, 2]).str.concat("-")[0] - # # => "1-null-2" - def concat(delimiter = "-") + # Polars::Series.new([1, nil, 2]).str.concat("-") + # # => + # # shape: (1,) + # # Series: '' [str] + # # [ + # # "1-2" + # # ] + # + # @example + # Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false) + # # => + # # shape: (1,) + # # Series: '' [str] + # # [ + # # null + # # ] + def concat(delimiter = "-", ignore_nulls: true) super end