Skip to content

Commit

Permalink
depr(python,rust!): Deprecate parse_int in favor of to_integer (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa authored Nov 15, 2023
1 parent 3f5b804 commit 55242df
Show file tree
Hide file tree
Showing 14 changed files with 111 additions and 77 deletions.
4 changes: 2 additions & 2 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ list_to_struct = ["polars-plan/list_to_struct"]
python = ["pyo3", "polars-plan/python", "polars-core/python", "polars-io/python"]
row_hash = ["polars-plan/row_hash"]
string_pad = ["polars-plan/string_pad"]
string_from_radix = ["polars-plan/string_from_radix"]
string_to_integer = ["polars-plan/string_to_integer"]
arg_where = ["polars-plan/arg_where"]
search_sorted = ["polars-plan/search_sorted"]
merge_sorted = ["polars-plan/merge_sorted"]
Expand Down Expand Up @@ -178,7 +178,7 @@ test_all = [
"ipc",
"row_hash",
"string_pad",
"string_from_radix",
"string_to_integer",
"search_sorted",
"top_k",
"pivot",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ diff = []
pct_change = ["diff"]
strings = ["polars-core/strings"]
string_pad = ["polars-core/strings"]
string_from_radix = ["polars-core/strings"]
string_to_integer = ["polars-core/strings"]
extract_jsonpath = ["serde_json", "jsonpath_lib", "polars-json"]
log = []
hash = []
Expand Down
17 changes: 8 additions & 9 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use arrow::legacy::kernels::string::*;
use base64::engine::general_purpose;
#[cfg(feature = "string_encoding")]
use base64::Engine as _;
#[cfg(feature = "string_from_radix")]
#[cfg(feature = "string_to_integer")]
use polars_core::export::num::Num;
use polars_core::export::regex::Regex;
use polars_core::prelude::arity::*;
Expand Down Expand Up @@ -60,15 +60,14 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
ca.apply_values(|s| general_purpose::STANDARD.encode(s).into())
}

#[cfg(feature = "string_from_radix")]
// Parse a string number with base _radix_ into a decimal (i32)
fn parse_int(&self, radix: u32, strict: bool) -> PolarsResult<Int32Chunked> {
use arrow::legacy::utils::CustomIterTools;
#[cfg(feature = "string_to_integer")]
// Parse a string number with base _radix_ into a decimal (i64)
fn to_integer(&self, base: u32, strict: bool) -> PolarsResult<Int64Chunked> {
let ca = self.as_utf8();
let f = |opt_s: Option<&str>| -> Option<i32> {
opt_s.and_then(|s| <i32 as Num>::from_str_radix(s, radix).ok())
let f = |opt_s: Option<&str>| -> Option<i64> {
opt_s.and_then(|s| <i64 as Num>::from_str_radix(s, base).ok())
};
let out: Int32Chunked = ca.into_iter().map(f).collect_trusted();
let out: Int64Chunked = ca.apply_generic(f);

if strict && ca.null_count() != out.null_count() {
let failure_mask = !ca.is_null() & out.is_null();
Expand All @@ -77,7 +76,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
let some_failures = all_failures.unique()?.slice(0, 10).sort(false);
let some_error_msg = some_failures
.get(0)
.and_then(|s| <i32 as Num>::from_str_radix(s, radix).err())
.and_then(|s| <i64 as Num>::from_str_radix(s, base).err())
.map_or_else(
|| unreachable!("failed to extract ParseIntError"),
|e| format!("{}", e),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ chunked_ids = ["polars-core/chunked_ids"]
list_to_struct = ["polars-ops/list_to_struct"]
row_hash = ["polars-core/row_hash", "polars-ops/hash"]
string_pad = ["polars-ops/string_pad"]
string_from_radix = ["polars-ops/string_from_radix"]
string_to_integer = ["polars-ops/string_to_integer"]
arg_where = []
search_sorted = ["polars-ops/search_sorted"]
merge_sorted = ["polars-ops/merge_sorted"]
Expand Down
22 changes: 11 additions & 11 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ pub enum StringFunction {
dtype: DataType,
pat: String,
},
#[cfg(feature = "string_from_radix")]
FromRadix(u32, bool),
#[cfg(feature = "string_to_integer")]
ToInteger(u32, bool),
LenBytes,
LenChars,
Lowercase,
Expand Down Expand Up @@ -123,8 +123,8 @@ impl StringFunction {
ExtractAll => mapper.with_dtype(DataType::List(Box::new(DataType::Utf8))),
#[cfg(feature = "extract_groups")]
ExtractGroups { dtype, .. } => mapper.with_dtype(dtype.clone()),
#[cfg(feature = "string_from_radix")]
FromRadix { .. } => mapper.with_dtype(DataType::Int32),
#[cfg(feature = "string_to_integer")]
ToInteger { .. } => mapper.with_dtype(DataType::Int64),
#[cfg(feature = "extract_jsonpath")]
JsonExtract { dtype, .. } => mapper.with_opt_dtype(dtype.clone()),
LenBytes => mapper.with_dtype(DataType::UInt32),
Expand Down Expand Up @@ -189,8 +189,8 @@ impl Display for StringFunction {
ExtractAll => "extract_all",
#[cfg(feature = "extract_groups")]
ExtractGroups { .. } => "extract_groups",
#[cfg(feature = "string_from_radix")]
FromRadix { .. } => "from_radix",
#[cfg(feature = "string_to_integer")]
ToInteger { .. } => "to_integer",
#[cfg(feature = "extract_jsonpath")]
JsonExtract { .. } => "json_extract",
LenBytes => "len_bytes",
Expand Down Expand Up @@ -312,8 +312,8 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
StripCharsEnd => map_as_slice!(strings::strip_chars_end),
StripPrefix => map_as_slice!(strings::strip_prefix),
StripSuffix => map_as_slice!(strings::strip_suffix),
#[cfg(feature = "string_from_radix")]
FromRadix(radix, strict) => map!(strings::from_radix, radix, strict),
#[cfg(feature = "string_to_integer")]
ToInteger(base, strict) => map!(strings::to_integer, base, strict),
Slice(start, length) => map!(strings::str_slice, start, length),
#[cfg(feature = "string_encoding")]
HexEncode => map!(strings::hex_encode),
Expand Down Expand Up @@ -802,10 +802,10 @@ pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult<Serie
.map(|ca| ca.into_series())
}

#[cfg(feature = "string_from_radix")]
pub(super) fn from_radix(s: &Series, radix: u32, strict: bool) -> PolarsResult<Series> {
#[cfg(feature = "string_to_integer")]
pub(super) fn to_integer(s: &Series, base: u32, strict: bool) -> PolarsResult<Series> {
let ca = s.utf8()?;
ca.parse_int(radix, strict).map(|ok| ok.into_series())
ca.to_integer(base, strict).map(|ok| ok.into_series())
}
pub(super) fn str_slice(s: &Series, start: i64, length: Option<u64>) -> PolarsResult<Series> {
let ca = s.utf8()?;
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,12 @@ impl StringNameSpace {
.map_private(FunctionExpr::StringExpr(StringFunction::Titlecase))
}

#[cfg(feature = "string_from_radix")]
#[cfg(feature = "string_to_integer")]
/// Parse string in base radix into decimal.
pub fn from_radix(self, radix: u32, strict: bool) -> Expr {
pub fn to_integer(self, base: u32, strict: bool) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::FromRadix(
radix, strict,
.map_private(FunctionExpr::StringExpr(StringFunction::ToInteger(
base, strict,
)))
}

Expand Down
4 changes: 2 additions & 2 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ list_take = ["polars-ops/list_take", "polars-lazy?/list_take"]
describe = ["polars-core/describe"]
timezones = ["polars-core/timezones", "polars-lazy?/timezones", "polars-io/timezones"]
string_pad = ["polars-lazy?/string_pad", "polars-ops/string_pad"]
string_from_radix = ["polars-lazy?/string_from_radix", "polars-ops/string_from_radix"]
string_to_integer = ["polars-lazy?/string_to_integer", "polars-ops/string_to_integer"]
arg_where = ["polars-lazy?/arg_where"]
search_sorted = ["polars-lazy?/search_sorted"]
merge_sorted = ["polars-lazy?/merge_sorted"]
Expand Down Expand Up @@ -314,7 +314,7 @@ docs-selection = [
"asof_join",
"cross_join",
"concat_str",
"string_from_radix",
"string_to_integer",
"decompress",
"mode",
"take_opt_iter",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@
//! * `temporal` - Conversions between [Chrono](https://docs.rs/chrono/) and Polars for temporal data types
//! * `timezones` - Activate timezone support.
//! * `strings` - Extra string utilities for [`Utf8Chunked`] //! - `string_pad` - `zfill`, `ljust`, `rjust`
//! - `string_from_radix` - `parse_int`
//! - `string_to_integer` - `parse_int`
//! * `object` - Support for generic ChunkedArrays called [`ObjectChunked<T>`] (generic over `T`).
//! These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait.
//! * Performance related:
Expand Down
2 changes: 1 addition & 1 deletion docs/user-guide/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ The opt-in features are:
- `timezones` - Activate timezone support.
- `strings` - Extra string utilities for `Utf8Chunked`
- `string_pad` - `pad_start`, `pad_end`, `zfill`
- `string_from_radix` - `parse_int`
- `string_to_integer` - `parse_int`
- `object` - Support for generic ChunkedArrays called `ObjectChunked<T>` (generic over `T`).
These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait.
- Performance related:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ features = [
"semi_anti_join",
"serde-lazy",
"string_encoding",
"string_from_radix",
"string_to_integer",
"string_pad",
"strings",
"temporal",
Expand Down
54 changes: 34 additions & 20 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@

import polars._reexport as pl
from polars import functions as F
from polars.datatypes import Date, Datetime, Time, py_type_to_dtype
from polars.datatypes import Date, Datetime, Int32, Time, py_type_to_dtype
from polars.exceptions import ChronoFormatWarning
from polars.utils._parse_expr_input import parse_as_expression
from polars.utils._wrap import wrap_expr
from polars.utils.deprecation import (
deprecate_renamed_function,
deprecate_renamed_parameter,
issue_deprecation_warning,
rename_use_earliest_to_ambiguous,
)
from polars.utils.various import find_stacklevel
Expand Down Expand Up @@ -2030,34 +2029,33 @@ def explode(self) -> Expr:
"""
return wrap_expr(self._pyexpr.str_explode())

def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr:
def to_integer(self, *, base: int = 10, strict: bool = True) -> Expr:
"""
Parse integers with base radix from strings.
ParseError/Overflows become Nulls.
Convert an Utf8 column into an Int64 column with base radix.
Parameters
----------
radix
base
Positive integer which is the base of the string we are parsing.
Default: 10.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.
Returns
-------
Expr
Expression of data type :class:`Int32`.
Expression of data type :class:`Int64`.
Examples
--------
>>> df = pl.DataFrame({"bin": ["110", "101", "010", "invalid"]})
>>> df.with_columns(parsed=pl.col("bin").str.parse_int(2, strict=False))
>>> df.with_columns(parsed=pl.col("bin").str.to_integer(base=2, strict=False))
shape: (4, 2)
┌─────────┬────────┐
│ bin ┆ parsed │
│ --- ┆ --- │
│ str ┆ i32
│ str ┆ i64
╞═════════╪════════╡
│ 110 ┆ 6 │
│ 101 ┆ 5 │
Expand All @@ -2066,12 +2064,12 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr:
└─────────┴────────┘
>>> df = pl.DataFrame({"hex": ["fa1e", "ff00", "cafe", None]})
>>> df.with_columns(parsed=pl.col("hex").str.parse_int(16, strict=True))
>>> df.with_columns(parsed=pl.col("hex").str.to_integer(base=16, strict=True))
shape: (4, 2)
┌──────┬────────┐
│ hex ┆ parsed │
│ --- ┆ --- │
│ str ┆ i32
│ str ┆ i64
╞══════╪════════╡
│ fa1e ┆ 64030 │
│ ff00 ┆ 65280 │
Expand All @@ -2080,15 +2078,31 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr:
└──────┴────────┘
"""
if radix is None:
issue_deprecation_warning(
"The default value for the `radix` parameter of `parse_int` will be removed in a future version."
" Call `parse_int(radix=2)` to keep current behavior and silence this warning.",
version="0.19.8",
)
radix = 2
return wrap_expr(self._pyexpr.str_to_integer(base, strict))

@deprecate_renamed_function("to_integer", version="0.19.14")
@deprecate_renamed_parameter("radix", "base", version="0.19.14")
def parse_int(self, base: int | None = None, *, strict: bool = True) -> Expr:
"""
Parse integers with base radix from strings.
ParseError/Overflows become Nulls.
return wrap_expr(self._pyexpr.str_parse_int(radix, strict))
.. deprecated:: 0.19.14
This method has been renamed to :func:`to_integer`.
Parameters
----------
base
Positive integer which is the base of the string we are parsing.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.
"""
if base is None:
base = 2
return self.to_integer(base=base, strict=strict).cast(Int32, strict=strict)

@deprecate_renamed_function("strip_chars", version="0.19.3")
def strip(self, characters: str | None = None) -> Expr:
Expand Down
38 changes: 28 additions & 10 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,31 +1511,30 @@ def explode(self) -> Series:
"""

def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series:
def to_integer(self, *, base: int = 10, strict: bool = True) -> Series:
"""
Parse integers with base radix from strings.
ParseError/Overflows become Nulls.
Convert an Utf8 column into an Int64 column with base radix.
Parameters
----------
radix
base
Positive integer which is the base of the string we are parsing.
Default: 10.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.
Returns
-------
Series
Series of data type :class:`Int32`.
Series of data type :class:`Int64`.
Examples
--------
>>> s = pl.Series("bin", ["110", "101", "010", "invalid"])
>>> s.str.parse_int(2, strict=False)
>>> s.str.to_integer(base=2, strict=False)
shape: (4,)
Series: 'bin' [i32]
Series: 'bin' [i64]
[
6
5
Expand All @@ -1544,9 +1543,9 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series:
]
>>> s = pl.Series("hex", ["fa1e", "ff00", "cafe", None])
>>> s.str.parse_int(16)
>>> s.str.to_integer(base=16)
shape: (4,)
Series: 'hex' [i32]
Series: 'hex' [i64]
[
64030
65280
Expand All @@ -1556,6 +1555,25 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series:
"""

@deprecate_renamed_function("to_integer", version="0.19.14")
@deprecate_renamed_parameter("radix", "base", version="0.19.14")
def parse_int(self, base: int | None = None, *, strict: bool = True) -> Series:
"""
Parse integers with base radix from strings.
.. deprecated:: 0.19.14
This method has been renamed to :func:`to_integer`.
Parameters
----------
base
Positive integer which is the base of the string we are parsing.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.
"""

@deprecate_renamed_function("strip_chars", version="0.19.3")
def strip(self, characters: str | None = None) -> Series:
"""
Expand Down
Loading

0 comments on commit 55242df

Please sign in to comment.