From 42c2c82a7dab0c27a7558890b9457383f343427d Mon Sep 17 00:00:00 2001 From: eakmanrq <6326532+eakmanrq@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:07:06 -0800 Subject: [PATCH] chore: reorganize functions --- sqlframe/base/function_alternatives.py | 609 +++++----- sqlframe/base/functions.py | 1073 ++++++++++++++++- sqlframe/base/session.py | 32 + sqlframe/bigquery/functions.py | 362 +----- sqlframe/bigquery/functions.pyi | 219 +--- sqlframe/bigquery/session.py | 4 + sqlframe/databricks/functions.py | 10 - sqlframe/databricks/functions.pyi | 818 +++++++------ sqlframe/databricks/session.py | 4 + sqlframe/duckdb/functions.py | 40 - sqlframe/duckdb/functions.pyi | 435 +++---- sqlframe/duckdb/session.py | 4 + sqlframe/postgres/functions.py | 61 +- sqlframe/postgres/functions.pyi | 393 +++--- sqlframe/postgres/session.py | 4 + sqlframe/redshift/functions.py | 5 +- sqlframe/redshift/session.py | 4 + sqlframe/snowflake/functions.py | 56 +- sqlframe/snowflake/functions.pyi | 444 +++---- sqlframe/snowflake/session.py | 4 + sqlframe/spark/functions.py | 9 - sqlframe/spark/functions.pyi | 824 +++++++------ sqlframe/spark/session.py | 4 + sqlframe/standalone/functions.py | 2 +- sqlframe/standalone/session.py | 4 + .../integration/engines/test_int_functions.py | 4 +- 26 files changed, 2937 insertions(+), 2491 deletions(-) diff --git a/sqlframe/base/function_alternatives.py b/sqlframe/base/function_alternatives.py index 15671ec..320aff7 100644 --- a/sqlframe/base/function_alternatives.py +++ b/sqlframe/base/function_alternatives.py @@ -58,12 +58,6 @@ def collect_set_from_list_distinct(col: ColumnOrName) -> Column: return collect_list(Column(expression.Distinct(expressions=[Column(col).expression]))) -def first_always_ignore_nulls(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column: - from sqlframe.base.functions import first - - return first(col) - - def to_timestamp_with_time_zone(col: ColumnOrName, format: t.Optional[str] = None) -> Column: from sqlframe.base.session import _BaseSession @@ -225,18 +219,6 @@ def nanvl_as_case(col1: ColumnOrName, col2: ColumnOrName) -> Column: return when(~isnan(col1), col(col1)).otherwise(col(col2)) -def percentile_approx_without_accuracy( - col: ColumnOrName, - percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]], - accuracy: t.Optional[float] = None, -) -> Column: - from sqlframe.base.functions import percentile_approx - - if accuracy: - logger.warning("Accuracy is ignored since it is not supported in this dialect") - return percentile_approx(col, percentage) - - def percentile_approx_without_accuracy_and_plural( col: ColumnOrName, percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]], @@ -269,8 +251,6 @@ def percentile_approx_without_accuracy_and_max_array( percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]], accuracy: t.Optional[float] = None, ) -> Column: - from sqlframe.base.functions import percentile_approx - lit = get_func_from_session("lit") array = get_func_from_session("array") col_func = get_func_from_session("col") @@ -283,9 +263,8 @@ def make_approx_percentile(percentage: float) -> expression.Anonymous: if accuracy: logger.warning("Accuracy is ignored since it is not supported in this dialect") - if isinstance(percentage, (list, tuple)): - return array(*[make_approx_percentile(p) for p in percentage]) - return percentile_approx(col, percentage) + + return array(*[make_approx_percentile(p) for p in percentage]) # type: ignore def percentile_without_disc( @@ -311,22 +290,6 @@ def percentile_without_disc( ) -def rand_no_seed(seed: t.Optional[ColumnOrLiteral] = None) -> Column: - from sqlframe.base.functions import rand - - if seed: - logger.warning("Seed is ignored since it is not supported in this dialect") - return rand() - - -def round_cast_as_numeric(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: - from sqlframe.base.functions import round - - col_func = get_func_from_session("col") - - return round(col_func(col).cast("numeric"), scale) - - def bround_using_half_even(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: lit_func = get_func_from_session("lit") @@ -428,16 +391,6 @@ def dayofweek_from_extract_with_isodow(col: ColumnOrName) -> Column: ) -def dayofmonth_from_extract(col: ColumnOrName) -> Column: - col_func = get_func_from_session("col") - - return Column( - expression.Extract( - this=expression.Var(this="dayofmonth"), expression=col_func(col).cast("date").expression - ) - ) - - def dayofmonth_from_extract_with_day(col: ColumnOrName) -> Column: col_func = get_func_from_session("col") @@ -512,20 +465,6 @@ def weekofyear_from_extract_as_isoweek(col: ColumnOrName) -> Column: ) -def make_date_casted_as_integer( - year: ColumnOrName, month: ColumnOrName, day: ColumnOrName -) -> Column: - from sqlframe.base.functions import make_date - - col_func = get_func_from_session("col") - - return make_date( - col_func(year).cast("integer"), - col_func(month).cast("integer"), - col_func(day).cast("integer"), - ) - - def make_date_from_date_func(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column: col_func = get_func_from_session("col") @@ -584,29 +523,6 @@ def date_sub_by_date_add( return date_add_func(col, days * lit_func(-1), cast_as_date) -def to_date_from_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column: - from sqlframe.base.functions import to_date - - to_timestamp = get_func_from_session("to_timestamp") - - return to_date(to_timestamp(col, format)) - - -def to_date_time_format(col: ColumnOrName, format: t.Optional[str] = None) -> Column: - from sqlframe.base.functions import to_date - from sqlframe.base.session import _BaseSession - - return to_date(col, format=format or _BaseSession().default_time_format) - - -def last_day_with_cast(col: ColumnOrName) -> Column: - from sqlframe.base.functions import last_day - - col_func = get_func_from_session("col") - - return last_day(col_func(col).cast("date")) - - def sha1_force_sha1_and_to_hex(col: ColumnOrName) -> Column: col_func = get_func_from_session("col") @@ -637,59 +553,12 @@ def hash_from_farm_fingerprint(*cols: ColumnOrName) -> Column: ) -def date_add_by_multiplication( - col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True -) -> Column: - from sqlframe.base.functions import date_add - - col_func = get_func_from_session("col") - - if isinstance(days, int): - value = date_add(col, days) - else: - value = date_add(col, 1, cast_as_date=False) * col_func(days) - if cast_as_date: - return value.cast("date") - return value - - -def date_sub_by_multiplication( - col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True -) -> Column: - from sqlframe.base.functions import date_sub - - col_func = get_func_from_session("col") - - if isinstance(days, int): - value = date_sub(col, days) - else: - value = date_sub(col, 1, cast_as_date=False) * col_func(days) - if cast_as_date: - return value.cast("date") - return value - - def date_diff_with_subtraction(end: ColumnOrName, start: ColumnOrName) -> Column: col_func = get_func_from_session("col") return col_func(end).cast("date") - col_func(start).cast("date") -def add_months_by_multiplication( - start: ColumnOrName, months: t.Union[ColumnOrName, int], cast_as_date: bool = True -) -> Column: - from sqlframe.base.functions import add_months - - col_func = get_func_from_session("col") - lit = get_func_from_session("lit") - - multiple_value = lit(months) if isinstance(months, int) else col_func(months) - value = col_func(add_months(start, 1, cast_as_date=False).expression.unnest()) * multiple_value - if cast_as_date: - return value.cast("date") - return value - - def add_months_using_func( start: ColumnOrName, months: t.Union[ColumnOrName, int], cast_as_date: bool = True ) -> Column: @@ -740,23 +609,6 @@ def months_between_from_age_and_extract( ).cast("bigint") -def months_between_cast_as_date_cast_roundoff( - date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None -) -> Column: - from sqlframe.base.functions import months_between - - col_func = get_func_from_session("col") - - date1 = col_func(date1).cast("date") - date2 = col_func(date2).cast("date") - - value = months_between(date1, date2) - - if roundOff: - return value.cast("bigint") - return value - - def from_unixtime_from_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column: from sqlframe.base.session import _BaseSession @@ -790,7 +642,7 @@ def base64_from_blob(col: ColumnOrLiteral) -> Column: return Column.invoke_expression_over_column(Column(col).cast("blob"), expression.ToBase64) -def bas64_from_encode(col: ColumnOrLiteral) -> Column: +def base64_from_encode(col: ColumnOrLiteral) -> Column: return Column( expression.Encode( this=Column(col).cast("bytea").expression, charset=expression.Literal.string("base64") @@ -955,16 +807,6 @@ def levenshtein_edit_distance( ) -def split_no_limit(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column: - from sqlframe.base.functions import split - - col_func = get_func_from_session("col") - - if limit is not None: - logger.warning("Limit is ignored since it is not supported in this dialect") - return split(col_func(str), pattern) - - def split_from_regex_split_to_array( str: ColumnOrName, pattern: str, limit: t.Optional[int] = None ) -> Column: @@ -997,17 +839,6 @@ def split_with_split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = N ) -def regexp_extract_coalesce_empty_str( - str: ColumnOrName, pattern: str, idx: t.Optional[int] = None -) -> Column: - from sqlframe.base.functions import regexp_extract - - coalesce = get_func_from_session("coalesce") - lit_func = get_func_from_session("lit") - - return coalesce(regexp_extract(str, pattern, idx), lit_func("")) - - def array_contains_any(col: ColumnOrName, value: ColumnOrLiteral) -> Column: lit = get_func_from_session("lit") value_col = value if isinstance(value, Column) else lit(value) @@ -1080,54 +911,6 @@ def slice_with_brackets( ) -def array_join_no_null_replacement( - col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None -) -> Column: - from sqlframe.base.functions import array_join - - if null_replacement is None: - logger.warning("Null replacement is ignored since it is not supported in this dialect") - return array_join(col, delimiter) - - -def array_join_null_replacement_with_transform( - col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None -) -> Column: - from sqlframe.base.functions import array_join - - col_func = get_func_from_session("col") - - if null_replacement is None: - return array_join(col, delimiter, null_replacement) - col = Column( - expression.Anonymous( - this="LIST_TRANSFORM", - expressions=[ - col_func(col).expression, - expression.Lambda( - this=expression.Coalesce( - this=expression.Cast( - this=expression.Identifier(this="x"), - to=expression.DataType.build("STRING"), - ), - expressions=[expression.Literal.string(null_replacement)], - ), - expressions=[expression.Identifier(this="x")], - ), - ], - ) - ) - return array_join(col, delimiter) - - -def array_contains_cast_variant(col: ColumnOrName, value: ColumnOrLiteral) -> Column: - from sqlframe.base.functions import array_contains - - lit = get_func_from_session("lit") - value_col = value if isinstance(value, Column) else lit(value) - return array_contains(col, value_col.cast("variant")) - - def arrays_overlap_as_plural(col1: ColumnOrName, col2: ColumnOrName) -> Column: col_func = get_func_from_session("col") @@ -1245,30 +1028,11 @@ def get_json_object_using_arrow_op(col: ColumnOrName, path: str) -> Column: ) -def get_json_object_cast_object(col: ColumnOrName, path: str) -> Column: - from sqlframe.base.functions import get_json_object - - col_func = get_func_from_session("col") - - return get_json_object(col_func(col).cast("variant"), path) - - def get_json_object_using_function(col: ColumnOrName, path: str) -> Column: lit = get_func_from_session("lit") return Column.invoke_anonymous_function(col, "GET_JSON_OBJECT", lit(path)) -def create_map_with_cast(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column: - from sqlframe.base.functions import create_map - - col = get_func_from_session("col") - - columns = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols - col1_dtype = col(columns[0]).dtype or "VARCHAR" - col2_dtype = col(columns[1]).dtype or "VARCHAR" - return create_map(*cols).cast(f"MAP({col1_dtype}, {col2_dtype})") - - def array_min_from_sort(col: ColumnOrName) -> Column: element_at = get_func_from_session("element_at") array_sort = get_func_from_session("array_sort") @@ -1468,60 +1232,10 @@ def bit_length_from_length(col: ColumnOrName) -> Column: return Column(expression.Length(this=col_func(col).expression)) * lit(8) -def any_value_always_ignore_nulls( - col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None -) -> Column: - from sqlframe.base.functions import any_value - - if not ignoreNulls: - logger.warning("Nulls are always ignored when using `ANY_VALUE` on this engine") - return any_value(col) - - -def any_value_ignore_nulls_not_supported( - col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None -) -> Column: - from sqlframe.base.functions import any_value - - if ignoreNulls: - logger.warning("Ignoring nulls is not supported in this dialect") - return any_value(col) - - def current_user_from_session_user() -> Column: return Column(expression.Anonymous(this="SESSION_USER")) -def extract_convert_to_var(field: ColumnOrName, source: ColumnOrName) -> Column: - from sqlframe.base.functions import extract - - field = expression.Var(this=Column.ensure_col(field).alias_or_name) # type: ignore - return extract(field, source) # type: ignore - - -def left_cast_len(str: ColumnOrName, len: ColumnOrName) -> Column: - from sqlframe.base.functions import left - - len = Column.ensure_col(len).cast("integer") - return left(str, len) - - -def right_cast_len(str: ColumnOrName, len: ColumnOrName) -> Column: - from sqlframe.base.functions import right - - len = Column.ensure_col(len).cast("integer") - return right(str, len) - - -def position_cast_start( - substr: ColumnOrName, str: ColumnOrName, start: t.Optional[ColumnOrName] = None -) -> Column: - from sqlframe.base.functions import position - - start = Column.ensure_col(start).cast("integer") if start else None - return position(substr, str, start) - - def position_as_strpos( substr: ColumnOrName, str: ColumnOrName, start: t.Optional[ColumnOrName] = None ) -> Column: @@ -1540,26 +1254,6 @@ def to_number_using_to_double(col: ColumnOrName, format: ColumnOrName) -> Column return Column.invoke_anonymous_function(col, "TO_DOUBLE", format) -def try_element_at_zero_based(col: ColumnOrName, extraction: ColumnOrName) -> Column: - from sqlframe.base.functions import try_element_at - - lit = get_func_from_session("lit") - index = Column.ensure_col(extraction) - if isinstance(index.expression, expression.Literal) and index.expression.is_number: - index = index - lit(1) - return try_element_at(col, index) - - -def to_unix_timestamp_include_default_format( - timestamp: ColumnOrName, - format: t.Optional[ColumnOrName] = None, -) -> Column: - from sqlframe.base.functions import to_unix_timestamp - from sqlframe.base.session import _BaseSession - - return to_unix_timestamp(timestamp, format or _BaseSession().default_time_format) - - def array_append_list_append(col: ColumnOrName, value: ColumnOrLiteral) -> Column: lit = get_func_from_session("lit") value = value if isinstance(value, Column) else lit(value) @@ -1641,6 +1335,14 @@ def typeof_from_variant(col: ColumnOrName) -> Column: return Column.invoke_anonymous_function(col, "TYPEOF") +def typeof_bgutil(col: ColumnOrName) -> Column: + return Column( + expression.Anonymous( + this="bqutil.fn.typeof", expressions=[Column.ensure_col(col).expression] + ) + ) + + def regexp_replace_global_option( str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None ) -> Column: @@ -1664,6 +1366,295 @@ def regexp_replace_global_option( ) +def degrees_bgutil(col: ColumnOrName) -> Column: + return Column( + expression.Anonymous( + this="bqutil.fn.degrees", expressions=[Column.ensure_col(col).expression] + ) + ) + + +def radians_bgutil(col: ColumnOrName) -> Column: + return Column( + expression.Anonymous( + this="bqutil.fn.radians", expressions=[Column.ensure_col(col).expression] + ) + ) + + +def bround_bgutil(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: + from sqlframe.base.session import _BaseSession + + lit = get_func_from_session("lit", _BaseSession()) + + expressions = [Column.ensure_col(col).cast("bignumeric").expression] + if scale is not None: + expressions.append(lit(scale).expression) + return Column( + expression.Anonymous( + this="bqutil.fn.cw_round_half_even", + expressions=expressions, + ) + ) + + +def months_between_bgutils( + date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None +) -> Column: + roundOff = True if roundOff is None else roundOff + round = get_func_from_session("round") + lit = get_func_from_session("lit") + + value = Column( + expression.Anonymous( + this="bqutil.fn.cw_months_between", + expressions=[ + Column.ensure_col(date1).cast("datetime").expression, + Column.ensure_col(date2).cast("datetime").expression, + ], + ) + ) + if roundOff: + value = round(value, lit(8)) + return value + + +def next_day_bgutil(col: ColumnOrName, dayOfWeek: str) -> Column: + lit = get_func_from_session("lit") + + return Column( + expression.Anonymous( + this="bqutil.fn.cw_next_day", + expressions=[Column.ensure_col(col).cast("date").expression, lit(dayOfWeek).expression], + ) + ) + + +def from_unixtime_bigutil(col: ColumnOrName, format: t.Optional[str] = None) -> Column: + from sqlframe.base.session import _BaseSession + + session: _BaseSession = _BaseSession() + + expressions = [Column.ensure_col(col).expression] + return Column( + expression.Anonymous( + this="FORMAT_TIMESTAMP", + expressions=[ + session.format_time(format), + Column( + expression.Anonymous(this="TIMESTAMP_SECONDS", expressions=expressions) + ).expression, + ], + ) + ) + + +def unix_timestamp_bgutil( + timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None +) -> Column: + from sqlframe.base.session import _BaseSession + + lit = get_func_from_session("lit") + return Column( + expression.Anonymous( + this="UNIX_SECONDS", + expressions=[ + expression.Anonymous( + this="PARSE_TIMESTAMP", + expressions=[ + _BaseSession().format_time(format), + Column.ensure_col(timestamp).expression, + lit("UTC").expression, + ], + ) + ], + ) + ) + + +def format_number_bgutil(col: ColumnOrName, d: int) -> Column: + round = get_func_from_session("round") + lit = get_func_from_session("lit") + + return Column( + expression.Anonymous( + this="FORMAT", + expressions=[ + lit(f"%'.{d}f").expression, + round(Column.ensure_col(col).cast("float"), d).expression, + ], + ) + ) + + +def substring_index_bgutil(str: ColumnOrName, delim: str, count: int) -> Column: + lit = get_func_from_session("lit") + + return Column( + expression.Anonymous( + this="bqutil.fn.cw_substring_index", + expressions=[ + Column.ensure_col(str).expression, + lit(delim).expression, + lit(count).expression, + ], + ) + ) + + +def bin_bgutil(col: ColumnOrName) -> Column: + return ( + Column( + expression.Anonymous( + this="bqutil.fn.to_binary", + expressions=[Column.ensure_col(col).expression], + ) + ) + .cast("int") + .cast("string") + ) + + +def slice_bgutil( + x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int] +) -> Column: + lit = get_func_from_session("lit") + + start_col = start if isinstance(start, Column) else lit(start) + length_col = length if isinstance(length, Column) else lit(length) + + subquery = ( + expression.select( + expression.column("x"), + ) + .from_( + expression.Unnest( + expressions=[Column.ensure_col(x).expression], + alias=expression.TableAlias( + columns=[expression.to_identifier("x")], + ), + offset=expression.to_identifier("offset"), + ) + ) + .where( + expression.Between( + this=expression.column("offset"), + low=(start_col - lit(1)).expression, + high=(start_col + length_col).expression, + ) + ) + ) + + return Column( + expression.Anonymous( + this="ARRAY", + expressions=[subquery], + ) + ) + + +def array_position_bgutil(col: ColumnOrName, value: ColumnOrLiteral) -> Column: + lit = get_func_from_session("lit") + + value_col = value if isinstance(value, Column) else lit(value) + + return Column( + expression.Coalesce( + this=expression.Anonymous( + this="bqutil.fn.find_in_set", + expressions=[ + value_col.expression, + expression.Anonymous( + this="ARRAY_TO_STRING", + expressions=[Column.ensure_col(col).expression, lit(",").expression], + ), + ], + ), + expressions=[lit(0).expression], + ) + ) + + +def array_remove_bgutil(col: ColumnOrName, value: ColumnOrLiteral) -> Column: + lit = get_func_from_session("lit") + + value_col = value if isinstance(value, Column) else lit(value) + + filter_subquery = expression.select( + "*", + ).from_( + expression.Unnest( + expressions=[Column.ensure_col(col).expression], + alias=expression.TableAlias( + columns=[expression.to_identifier("x")], + ), + ) + ) + + agg_subquery = ( + expression.select( + expression.Anonymous( + this="ARRAY_AGG", + expressions=[expression.column("x")], + ), + ) + .from_(filter_subquery.subquery("t")) + .where( + expression.NEQ( + this=expression.column("x", "t"), + expression=value_col.expression, + ) + ) + ) + + return Column(agg_subquery.subquery()) + + +def array_distinct_bgutil(col: ColumnOrName) -> Column: + return Column( + expression.Anonymous( + this="bqutil.fn.cw_array_distinct", + expressions=[Column.ensure_col(col).expression], + ) + ) + + +def array_min_bgutil(col: ColumnOrName) -> Column: + return Column( + expression.Anonymous( + this="bqutil.fn.cw_array_min", + expressions=[Column.ensure_col(col).expression], + ) + ) + + +def array_max_bgutil(col: ColumnOrName) -> Column: + return Column( + expression.Anonymous( + this="bqutil.fn.cw_array_max", + expressions=[Column.ensure_col(col).expression], + ) + ) + + +def sort_array_bgutil(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column: + order = "ASC" if asc or asc is None else "DESC" + subquery = ( + expression.select("x") + .from_( + expression.Unnest( + expressions=[Column.ensure_col(col).expression], + alias=expression.TableAlias( + columns=[expression.to_identifier("x")], + ), + ) + ) + .order_by(f"x {order}") + ) + + return Column(expression.Anonymous(this="ARRAY", expressions=[subquery])) + + def _is_string_using_typeof_varchar(col: ColumnOrName) -> Column: typeof = get_func_from_session("typeof") lit = get_func_from_session("lit") diff --git a/sqlframe/base/functions.py b/sqlframe/base/functions.py index 9d8a8ad..0890ea3 100644 --- a/sqlframe/base/functions.py +++ b/sqlframe/base/functions.py @@ -21,12 +21,18 @@ from pyspark.sql.session import SparkContext from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName - from sqlframe.base.session import DF + from sqlframe.base.session import DF, _BaseSession from sqlframe.base.types import ArrayType, StructType logger = logging.getLogger(__name__) +def _get_session() -> _BaseSession: + from sqlframe.base.session import _BaseSession + + return _BaseSession() + + @meta() def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column: from sqlframe.base.session import _BaseSession @@ -237,6 +243,19 @@ def csc(col: ColumnOrName) -> Column: @meta() def e() -> Column: + from sqlframe.base.function_alternatives import e_literal + + session = _get_session() + + if ( + session._is_bigquery + or session._is_duckdb + or session._is_postgres + or session._is_redshift + or session._is_snowflake + ): + return e_literal() + return Column(expression.Anonymous(this="e")) @@ -247,11 +266,31 @@ def exp(col: ColumnOrName) -> Column: @meta() def expm1(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import expm1_from_exp + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + return expm1_from_exp(col) + return Column.invoke_anonymous_function(col, "EXPM1") @meta() def factorial(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + factorial_ensure_int, + factorial_from_case_statement, + ) + + session = _get_session() + + if session._is_duckdb: + return factorial_ensure_int(col) + + if session._is_bigquery: + return factorial_from_case_statement(col) + return Column.invoke_anonymous_function(col, "FACTORIAL") @@ -267,6 +306,13 @@ def log10(col: ColumnOrName) -> Column: @meta() def log1p(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import log1p_from_log + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + return log1p_from_log(col) + return Column.invoke_anonymous_function(col, "LOG1P") @@ -286,6 +332,13 @@ def log(arg1: t.Union[ColumnOrName, float], arg2: t.Optional[ColumnOrName] = Non @meta() def rint(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import rint_from_round + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + return rint_from_round(col) + return Column.invoke_anonymous_function(col, "RINT") @@ -321,6 +374,13 @@ def tanh(col: ColumnOrName) -> Column: @meta() def degrees(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import degrees_bgutil + + session = _get_session() + + if session._is_bigquery: + return degrees_bgutil(col) + return Column.invoke_anonymous_function(col, "DEGREES") @@ -329,6 +389,13 @@ def degrees(col: ColumnOrName) -> Column: @meta() def radians(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import radians_bgutil + + session = _get_session() + + if session._is_bigquery: + return radians_bgutil(col) + return Column.invoke_anonymous_function(col, "RADIANS") @@ -342,6 +409,13 @@ def bitwiseNOT(col: ColumnOrName) -> Column: @meta() def bitwise_not(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import bitwise_not_from_bitnot + + session = _get_session() + + if session._is_snowflake: + return bitwise_not_from_bitnot(col) + return Column.invoke_expression_over_column(col, expression.BitwiseNot) @@ -397,11 +471,25 @@ def var_pop(col: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "postgres"]) def skewness(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import skewness_from_skew + + session = _get_session() + + if session._is_snowflake: + return skewness_from_skew(col) + return Column.invoke_anonymous_function(col, "SKEWNESS") @meta(unsupported_engines=["bigquery", "postgres"]) def kurtosis(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import kurtosis_from_kurtosis_pop + + session = _get_session() + + if session._is_duckdb: + return kurtosis_from_kurtosis_pop(col) + return Column.invoke_anonymous_function(col, "KURTOSIS") @@ -412,6 +500,13 @@ def collect_list(col: ColumnOrName) -> Column: @meta() def collect_set(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import collect_set_from_list_distinct + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres: + return collect_set_from_list_distinct(col) + return Column.invoke_expression_over_column(col, expression.ArrayUniqueAgg) @@ -495,6 +590,11 @@ def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "postgres", "snowflake"]) def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column: + session = _get_session() + + if session._is_duckdb: + ignorenulls = None + this = Column.invoke_expression_over_column(col, expression.First) if ignorenulls: return Column.invoke_expression_over_column(this, expression.IgnoreNulls) @@ -519,11 +619,25 @@ def input_file_name() -> Column: @meta() def isnan(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import isnan_using_equal + + session = _get_session() + + if session._is_postgres or session._is_snowflake: + return isnan_using_equal(col) + return Column.invoke_expression_over_column(col, expression.IsNan) @meta() def isnull(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import isnull_using_equal + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + return isnull_using_equal(col) + return Column.invoke_anonymous_function(col, "ISNULL") @@ -542,6 +656,13 @@ def monotonically_increasing_id() -> Column: @meta() def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import nanvl_as_case + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + return nanvl_as_case(col1, col2) + return Column.invoke_anonymous_function(col1, "NANVL", col2) @@ -551,6 +672,24 @@ def percentile_approx( percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]], accuracy: t.Optional[t.Union[ColumnOrLiteral, int]] = None, ) -> Column: + from sqlframe.base.function_alternatives import ( + percentile_approx_without_accuracy_and_max_array, + percentile_approx_without_accuracy_and_plural, + ) + + session = _get_session() + + if session._is_bigquery: + return percentile_approx_without_accuracy_and_plural(col, percentage, accuracy) # type: ignore + + if session._is_duckdb: + if accuracy: + logger.warning("Accuracy is ignored since it is not supported in this dialect") + accuracy = None + + if session._is_snowflake and isinstance(percentage, (list, tuple)): + return percentile_approx_without_accuracy_and_max_array(col, percentage, accuracy) # type: ignore + if accuracy: return Column.invoke_expression_over_column( col, expression.ApproxQuantile, quantile=lit(percentage), accuracy=accuracy @@ -566,6 +705,13 @@ def percentile( percentage: t.Union[ColumnOrLiteral, t.List[float], t.Tuple[float]], frequency: t.Optional[ColumnOrLiteral] = None, ) -> Column: + from sqlframe.base.function_alternatives import percentile_without_disc + + session = _get_session() + + if session._is_databricks or session._is_spark: + return percentile_without_disc(col, percentage, frequency) + if frequency: logger.warning("Frequency is not supported in all engines") return Column.invoke_expression_over_column( @@ -575,6 +721,13 @@ def percentile( @meta() def rand(seed: t.Optional[int] = None) -> Column: + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres: + if seed: + logger.warning("Seed is ignored since it is not supported in this dialect") + seed = None + if seed is not None: return Column.invoke_expression_over_column(None, expression.Rand, this=lit(seed)) return Column.invoke_expression_over_column(None, expression.Rand) @@ -589,6 +742,11 @@ def randn(seed: t.Optional[int] = None) -> Column: @meta() def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: + session = _get_session() + + if session._is_postgres: + col = Column.ensure_col(col).cast("numeric") + if scale is not None: return Column.invoke_expression_over_column(col, expression.Round, decimals=scale) return Column.invoke_expression_over_column(col, expression.Round) @@ -596,6 +754,19 @@ def round(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: @meta(unsupported_engines=["duckdb", "postgres"]) def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: + from sqlframe.base.function_alternatives import ( + bround_bgutil, + bround_using_half_even, + ) + + session = _get_session() + + if session._is_bigquery: + return bround_bgutil(col, scale) + + if session._is_snowflake: + return bround_using_half_even(col, scale) + if scale is not None: return Column.invoke_anonymous_function(col, "BROUND", lit(scale)) return Column.invoke_anonymous_function(col, "BROUND") @@ -603,6 +774,13 @@ def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: @meta() def shiftleft(col: ColumnOrName, numBits: int) -> Column: + from sqlframe.base.function_alternatives import shiftleft_from_bitshiftleft + + session = _get_session() + + if session._is_snowflake: + return shiftleft_from_bitshiftleft(col, numBits) + return Column.invoke_expression_over_column( col, expression.BitwiseLeftShift, expression=lit(numBits) ) @@ -613,6 +791,13 @@ def shiftleft(col: ColumnOrName, numBits: int) -> Column: @meta() def shiftright(col: ColumnOrName, numBits: int) -> Column: + from sqlframe.base.function_alternatives import shiftright_from_bitshiftright + + session = _get_session() + + if session._is_snowflake: + return shiftright_from_bitshiftright(col, numBits) + return Column.invoke_expression_over_column( col, expression.BitwiseRightShift, expression=lit(numBits) ) @@ -638,6 +823,13 @@ def expr(str: str) -> Column: @meta(unsupported_engines=["postgres"]) def struct(col: t.Union[ColumnOrName, t.Iterable[ColumnOrName]], *cols: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import struct_with_eq + + session = _get_session() + + if session._is_snowflake: + return struct_with_eq(col, *cols) + columns = ensure_list(col) + list(cols) return Column.invoke_expression_over_column(None, expression.Struct, expressions=columns) @@ -707,6 +899,13 @@ def date_format(col: ColumnOrName, format: str) -> Column: @meta() def year(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import year_from_extract + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return year_from_extract(col) + return Column.invoke_expression_over_column( Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.Year ) @@ -714,6 +913,13 @@ def year(col: ColumnOrName) -> Column: @meta() def quarter(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import quarter_from_extract + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return quarter_from_extract(col) + return Column( expression.Anonymous( this="QUARTER", @@ -724,6 +930,13 @@ def quarter(col: ColumnOrName) -> Column: @meta() def month(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import month_from_extract + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return month_from_extract(col) + return Column.invoke_expression_over_column( Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.Month ) @@ -731,6 +944,19 @@ def month(col: ColumnOrName) -> Column: @meta() def dayofweek(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + dayofweek_from_extract, + dayofweek_from_extract_with_isodow, + ) + + session = _get_session() + + if session._is_bigquery: + return dayofweek_from_extract(col) + + if session._is_postgres: + return dayofweek_from_extract_with_isodow(col) + return Column.invoke_expression_over_column( Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.DayOfWeek, @@ -739,6 +965,13 @@ def dayofweek(col: ColumnOrName) -> Column: @meta() def dayofmonth(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import dayofmonth_from_extract_with_day + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return dayofmonth_from_extract_with_day(col) + return Column.invoke_expression_over_column( Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.DayOfMonth, @@ -747,6 +980,19 @@ def dayofmonth(col: ColumnOrName) -> Column: @meta() def dayofyear(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + dayofyear_from_extract, + dayofyear_from_extract_doy, + ) + + session = _get_session() + + if session._is_bigquery: + return dayofyear_from_extract(col) + + if session._is_postgres: + return dayofyear_from_extract_doy(col) + return Column.invoke_expression_over_column( Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.DayOfYear, @@ -755,21 +1001,55 @@ def dayofyear(col: ColumnOrName) -> Column: @meta() def hour(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import hour_from_extract + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return hour_from_extract(col) + return Column.invoke_anonymous_function(col, "HOUR") @meta() def minute(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import minute_from_extract + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return minute_from_extract(col) + return Column.invoke_anonymous_function(col, "MINUTE") @meta() def second(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import second_from_extract + + session = _get_session() + + if session._is_bigquery or session._is_postgres: + return second_from_extract(col) + return Column.invoke_anonymous_function(col, "SECOND") @meta() def weekofyear(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + weekofyear_from_extract_as_isoweek, + weekofyear_from_extract_as_week, + ) + + session = _get_session() + + if session._is_bigquery: + return weekofyear_from_extract_as_isoweek(col) + + if session._is_postgres: + return weekofyear_from_extract_as_week(col) + return Column.invoke_expression_over_column( Column(expression.TsOrDsToDate(this=Column.ensure_col(col).expression)), expression.WeekOfYear, @@ -778,6 +1058,24 @@ def weekofyear(col: ColumnOrName) -> Column: @meta() def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + make_date_date_from_parts, + make_date_from_date_func, + ) + + session = _get_session() + + if session._is_bigquery: + return make_date_from_date_func(year, month, day) + + if session._is_postgres: + year = Column.ensure_col(year).cast("integer") + month = Column.ensure_col(month).cast("integer") + day = Column.ensure_col(day).cast("integer") + + if session._is_snowflake: + return make_date_date_from_parts(year, month, day) + return Column.invoke_anonymous_function(year, "MAKE_DATE", month, day) @@ -785,9 +1083,22 @@ def make_date(year: ColumnOrName, month: ColumnOrName, day: ColumnOrName) -> Col def date_add( col: ColumnOrName, days: t.Union[ColumnOrName, int], cast_as_date: bool = True ) -> Column: + from sqlframe.base.function_alternatives import date_add_no_date_sub + + session = _get_session() + date_sub_func = get_func_from_session("date_sub") + original_days = None + + if session._is_postgres and not isinstance(days, int): + original_days = days + days = 1 + + if session._is_snowflake: + return date_add_no_date_sub(col, days, cast_as_date) + if isinstance(days, int): if days < 0: - return date_sub(col, days * -1) + return date_sub_func(col, days * -1) days = lit(days) result = Column.invoke_expression_over_column( Column.ensure_col(col).cast("date"), @@ -795,6 +1106,10 @@ def date_add( expression=days, unit=expression.Var(this="DAY"), ) + + if session._is_postgres and original_days is not None: + result = result * Column.ensure_col(original_days) + if cast_as_date: return result.cast("date") return result @@ -807,9 +1122,22 @@ def date_sub( """ Non-standard argument: cast_as_date """ + from sqlframe.base.function_alternatives import date_sub_by_date_add + + session = _get_session() + date_add_func = get_func_from_session("date_add") + original_days = None + + if session._is_postgres and not isinstance(days, int): + original_days = days + days = 1 + + if session._is_snowflake: + return date_sub_by_date_add(col, days, cast_as_date) + if isinstance(days, int): if days < 0: - return date_add(col, days * -1) + return date_add_func(col, days * -1) days = lit(days) result = Column.invoke_expression_over_column( Column.ensure_col(col).cast("date"), @@ -817,6 +1145,10 @@ def date_sub( expression=days, unit=expression.Var(this="DAY"), ) + + if session._is_postgres and original_days is not None: + result = result * Column.ensure_col(original_days) + if cast_as_date: return result.cast("date") return result @@ -824,6 +1156,13 @@ def date_sub( @meta() def date_diff(end: ColumnOrName, start: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import date_diff_with_subtraction + + session = _get_session() + + if session._is_postgres: + return date_diff_with_subtraction(end, start) + return Column.invoke_expression_over_column( Column.ensure_col(end).cast("date"), expression.DateDiff, @@ -838,6 +1177,18 @@ def add_months( """ Non-standard argument: cast_as_date """ + from sqlframe.base.function_alternatives import add_months_using_func + + lit = get_func_from_session("lit") + session = _get_session() + original_months = months + + if session._is_databricks or session._is_postgres or session._is_spark: + months = 1 + + if session._is_snowflake: + return add_months_using_func(start, months, cast_as_date) + start_col = Column(start).cast("date") if isinstance(months, int): @@ -860,6 +1211,15 @@ def add_months( ) ) result = start_col + end_col + + if session._is_databricks or session._is_postgres or session._is_spark: + multiple_value = ( + lit(original_months) + if isinstance(original_months, int) + else Column.ensure_col(original_months) + ) + result = Column.ensure_col(result.expression.unnest()) * multiple_value + if cast_as_date: return result.cast("date") return result @@ -869,35 +1229,79 @@ def add_months( def months_between( date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None ) -> Column: + from sqlframe.base.function_alternatives import ( + months_between_bgutils, + months_between_from_age_and_extract, + ) + + session = _get_session() + original_roundoff = roundOff + + if session._is_bigquery: + return months_between_bgutils(date1, date2, roundOff) + + if session._is_postgres: + return months_between_from_age_and_extract(date1, date2, roundOff) + + if session._is_snowflake: + date1 = Column.ensure_col(date1).cast("date") + date2 = Column.ensure_col(date2).cast("date") + roundOff = None + if roundOff is None: - return Column.invoke_expression_over_column( + result = Column.invoke_expression_over_column( date1, expression.MonthsBetween, expression=date2 ) + else: + result = Column.invoke_expression_over_column( + date1, expression.MonthsBetween, expression=date2, roundoff=lit(roundOff) + ) - return Column.invoke_expression_over_column( - date1, expression.MonthsBetween, expression=date2, roundoff=lit(roundOff) - ) + if session._is_snowflake and original_roundoff: + return result.cast("bigint") + return result @meta() def to_date(col: ColumnOrName, format: t.Optional[str] = None) -> Column: - from sqlframe.base.session import _BaseSession + session = _get_session() + + if session._is_bigquery: + to_timestamp_func = get_func_from_session("to_timestamp") + col = to_timestamp_func(col, format) + + if session._is_snowflake: + format = format or session.default_time_format - # format = lit(format or spark_default_date_format()) if format is not None: return Column.invoke_expression_over_column( - col, expression.TsOrDsToDate, format=_BaseSession().format_time(format) + col, expression.TsOrDsToDate, format=session.format_time(format) ) return Column.invoke_expression_over_column(col, expression.TsOrDsToDate) @meta() def to_timestamp(col: ColumnOrName, format: t.Optional[str] = None) -> Column: - from sqlframe.base.session import _BaseSession + from sqlframe.base.function_alternatives import ( + to_timestamp_just_timestamp, + to_timestamp_tz, + to_timestamp_with_time_zone, + ) + + session = _get_session() + + if session._is_duckdb: + return to_timestamp_tz(col, format) + + if session._is_bigquery: + return to_timestamp_just_timestamp(col, format) + + if session._is_postgres: + return to_timestamp_with_time_zone(col, format) if format is not None: return Column.invoke_expression_over_column( - col, expression.StrToTime, format=_BaseSession().format_time(format) + col, expression.StrToTime, format=session.format_time(format) ) return Column.ensure_col(col).cast("timestampltz") @@ -919,22 +1323,45 @@ def date_trunc(format: str, timestamp: ColumnOrName) -> Column: @meta(unsupported_engines=["duckdb", "postgres"]) def next_day(col: ColumnOrName, dayOfWeek: str) -> Column: + from sqlframe.base.function_alternatives import next_day_bgutil + + session = _get_session() + + if session._is_bigquery: + return next_day_bgutil(col, dayOfWeek) + return Column.invoke_anonymous_function(col, "NEXT_DAY", lit(dayOfWeek)) @meta() def last_day(col: ColumnOrName) -> Column: + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + col = Column.ensure_col(col).cast("date") + return Column.invoke_expression_over_column(col, expression.LastDay) @meta() def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column: - from sqlframe.base.session import _BaseSession + from sqlframe.base.function_alternatives import ( + from_unixtime_bigutil, + from_unixtime_from_timestamp, + ) + + session = _get_session() + + if session._is_bigquery: + return from_unixtime_bigutil(col, format) + + if session._is_postgres or session._is_snowflake: + return from_unixtime_from_timestamp(col, format) return Column.invoke_expression_over_column( col, expression.UnixToStr, - format=_BaseSession().format_time(format), + format=session.format_time(format), ) @@ -942,12 +1369,23 @@ def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column: def unix_timestamp( timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None ) -> Column: - from sqlframe.base.session import _BaseSession + from sqlframe.base.function_alternatives import ( + unix_timestamp_bgutil, + unix_timestamp_from_extract, + ) + + session = _get_session() + + if session._is_bigquery: + return unix_timestamp_bgutil(timestamp, format) + + if session._is_postgres or session._is_snowflake: + return unix_timestamp_from_extract(timestamp, format) return Column.invoke_expression_over_column( timestamp, expression.StrToUnix, - format=_BaseSession().format_time(format), + format=session.format_time(format), ).cast("bigint") @@ -1010,6 +1448,13 @@ def md5(col: ColumnOrName) -> Column: @meta(unsupported_engines=["duckdb", "postgres"]) def sha1(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import sha1_force_sha1_and_to_hex + + session = _get_session() + + if session._is_bigquery: + return sha1_force_sha1_and_to_hex(col) + return Column.invoke_expression_over_column(col, expression.SHA) @@ -1020,6 +1465,13 @@ def sha2(col: ColumnOrName, numBits: int) -> Column: @meta(unsupported_engines=["postgres"]) def hash(*cols: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import hash_from_farm_fingerprint + + session = _get_session() + + if session._is_bigquery: + return hash_from_farm_fingerprint(*cols) + args = cols[1:] if len(cols) > 1 else [] return Column.invoke_anonymous_function(cols[0], "HASH", *args) @@ -1061,11 +1513,41 @@ def ascii(col: ColumnOrName) -> Column: @meta() def base64(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + base64_from_base64_encode, + base64_from_blob, + base64_from_encode, + ) + + session = _get_session() + + if session._is_bigquery or session._is_duckdb: + return base64_from_blob(col) + + if session._is_postgres: + return base64_from_encode(col) + + if session._is_snowflake: + return base64_from_base64_encode(col) + return Column.invoke_expression_over_column(col, expression.ToBase64) @meta() def unbase64(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + unbase64_from_base64_decode_string, + unbase64_from_decode, + ) + + session = _get_session() + + if session._is_postgres: + return unbase64_from_decode(col) + + if session._is_snowflake: + return unbase64_from_base64_decode_string(col) + return Column.invoke_expression_over_column(col, expression.FromBase64) @@ -1086,6 +1568,13 @@ def trim(col: ColumnOrName) -> Column: @meta() def concat_ws(sep: str, *cols: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import concat_ws_from_array_to_string + + session = _get_session() + + if session._is_bigquery: + return concat_ws_from_array_to_string(sep, *cols) + return Column.invoke_expression_over_column( None, expression.ConcatWs, expressions=[lit(sep)] + list(cols) ) @@ -1093,6 +1582,19 @@ def concat_ws(sep: str, *cols: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "snowflake"]) def decode(col: ColumnOrName, charset: str) -> Column: + from sqlframe.base.function_alternatives import ( + decode_from_blob, + decode_from_convert_from, + ) + + session = _get_session() + + if session._is_duckdb: + return decode_from_blob(col, charset) + + if session._is_postgres: + return decode_from_convert_from(col, charset) + return Column.invoke_expression_over_column( col, expression.Decode, charset=expression.Literal.string(charset) ) @@ -1100,6 +1602,13 @@ def decode(col: ColumnOrName, charset: str) -> Column: @meta(unsupported_engines=["bigquery", "snowflake"]) def encode(col: ColumnOrName, charset: str) -> Column: + from sqlframe.base.function_alternatives import encode_from_convert_to + + session = _get_session() + + if session._is_postgres: + return encode_from_convert_to(col, charset) + return Column.invoke_expression_over_column( col, expression.Encode, charset=expression.Literal.string(charset) ) @@ -1107,11 +1616,37 @@ def encode(col: ColumnOrName, charset: str) -> Column: @meta(unsupported_engines="duckdb") def format_number(col: ColumnOrName, d: int) -> Column: + from sqlframe.base.function_alternatives import ( + format_number_bgutil, + format_number_from_to_char, + ) + + session = _get_session() + + if session._is_bigquery: + return format_number_bgutil(col, d) + + if session._is_postgres or session._is_snowflake: + return format_number_from_to_char(col, d) + return Column.invoke_anonymous_function(col, "FORMAT_NUMBER", lit(d)) @meta(unsupported_engines="snowflake") def format_string(format: str, *cols: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + format_string_with_format, + format_string_with_pipes, + ) + + session = _get_session() + + if session._is_duckdb: + return format_string_with_pipes(format, *cols) + + if session._is_bigquery or session._is_postgres: + return format_string_with_format(format, *cols) + format_col = lit(format) columns = [Column.ensure_col(x) for x in cols] return Column.invoke_anonymous_function(format_col, "FORMAT_STRING", *columns) @@ -1119,6 +1654,13 @@ def format_string(format: str, *cols: ColumnOrName) -> Column: @meta() def instr(col: ColumnOrName, substr: str) -> Column: + from sqlframe.base.function_alternatives import instr_using_strpos + + session = _get_session() + + if session._is_bigquery: + return instr_using_strpos(col, substr) + return Column.invoke_expression_over_column(col, expression.StrPosition, substr=lit(substr)) @@ -1129,6 +1671,12 @@ def overlay( pos: t.Union[ColumnOrName, int], len: t.Optional[t.Union[ColumnOrName, int]] = None, ) -> Column: + from sqlframe.base.function_alternatives import overlay_from_substr + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_snowflake: + return overlay_from_substr(src, replace, pos, len) return Column.invoke_expression_over_column( src, expression.Overlay, @@ -1162,6 +1710,13 @@ def substring(str: ColumnOrName, pos: int, len: int) -> Column: @meta(unsupported_engines=["duckdb", "postgres", "snowflake"]) def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: + from sqlframe.base.function_alternatives import substring_index_bgutil + + session = _get_session() + + if session._is_bigquery: + return substring_index_bgutil(str, delim, count) + return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count)) @@ -1169,6 +1724,13 @@ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: def levenshtein( left: ColumnOrName, right: ColumnOrName, threshold: t.Optional[int] = None ) -> Column: + from sqlframe.base.function_alternatives import levenshtein_edit_distance + + session = _get_session() + + if session._is_snowflake: + return levenshtein_edit_distance(left, right, threshold) + value: t.Union[expression.Case, expression.Levenshtein] = expression.Levenshtein( this=Column.ensure_col(left).expression, expression=Column.ensure_col(right).expression, @@ -1225,6 +1787,24 @@ def repeat(col: ColumnOrName, n: int) -> Column: @meta() def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Column: + from sqlframe.base.function_alternatives import ( + split_from_regex_split_to_array, + split_with_split, + ) + + session = _get_session() + + if session._is_duckdb: + if limit is not None: + logger.warning("Limit is ignored since it is not supported in this dialect") + limit = None + + if session._is_bigquery or session._is_snowflake: + return split_with_split(str, pattern, limit) + + if session._is_postgres: + return split_from_regex_split_to_array(str, pattern, limit) + if limit is not None: return Column.invoke_expression_over_column( str, expression.RegexpSplit, expression=lit(pattern), limit=lit(limit) @@ -1236,22 +1816,39 @@ def split(str: ColumnOrName, pattern: str, limit: t.Optional[int] = None) -> Col @meta(unsupported_engines="postgres") def regexp_extract(str: ColumnOrName, pattern: str, idx: t.Optional[int] = None) -> Column: + session = _get_session() + if idx is not None: - return Column.invoke_expression_over_column( + result = Column.invoke_expression_over_column( str, expression.RegexpExtract, expression=lit(pattern), group=lit(idx), ) - return Column.invoke_expression_over_column( - str, expression.RegexpExtract, expression=lit(pattern) - ) + else: + result = Column.invoke_expression_over_column( + str, expression.RegexpExtract, expression=lit(pattern) + ) + + if session._is_snowflake: + coalesce_func = get_func_from_session("coalesce") + + result = coalesce_func(result, lit("")) + + return result @meta() def regexp_replace( str: ColumnOrName, pattern: str, replacement: str, position: t.Optional[int] = None ) -> Column: + from sqlframe.base.function_alternatives import regexp_replace_global_option + + session = _get_session() + + if session._is_duckdb or session._is_postgres: + return regexp_replace_global_option(str, pattern, replacement, position) + if position is not None: return Column.invoke_expression_over_column( str, @@ -1280,16 +1877,43 @@ def soundex(col: ColumnOrName) -> Column: @meta(unsupported_engines=["postgres", "snowflake"]) def bin(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import bin_bgutil + + session = _get_session() + + if session._is_bigquery: + return bin_bgutil(col) + return Column.invoke_anonymous_function(col, "BIN") @meta(unsupported_engines="postgres") def hex(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + hex_casted_as_bytes, + hex_using_encode, + ) + + session = _get_session() + + if session._is_bigquery: + return hex_casted_as_bytes(col) + + if session._is_snowflake: + return hex_using_encode(col) + return Column.invoke_expression_over_column(col, expression.Hex) @meta(unsupported_engines="postgres") def unhex(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import unhex_hex_decode_str + + session = _get_session() + + if session._is_snowflake: + return unhex_hex_decode_str(col) + return Column.invoke_expression_over_column(col, expression.Unhex) @@ -1305,6 +1929,13 @@ def octet_length(col: ColumnOrName) -> Column: @meta() def bit_length(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import bit_length_from_length + + session = _get_session() + + if session._is_bigquery: + return bit_length_from_length(col) + return Column.invoke_anonymous_function(col, "BIT_LENGTH") @@ -1326,6 +1957,19 @@ def array_agg(col: ColumnOrName) -> Column: @meta() def array_append(col: ColumnOrName, value: ColumnOrLiteral) -> Column: + from sqlframe.base.function_alternatives import ( + array_append_list_append, + array_append_using_array_cat, + ) + + session = _get_session() + + if session._is_bigquery: + return array_append_using_array_cat(col, value) + + if session._is_duckdb: + return array_append_list_append(col, value) + value = value if isinstance(value, Column) else lit(value) return Column.invoke_anonymous_function(col, "ARRAY_APPEND", value) @@ -1388,13 +2032,21 @@ def getbit(col: ColumnOrName, pos: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "postgres"]) def create_map(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column: + session = _get_session() + cols = list(_flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore - return Column.invoke_expression_over_column( + result = Column.invoke_expression_over_column( None, expression.VarMap, keys=array(*cols[::2]).expression, values=array(*cols[1::2]).expression, ) + if not session._is_snowflake: + return result + + col1_dtype = col(cols[0]).dtype or "VARCHAR" + col2_dtype = col(cols[1]).dtype or "VARCHAR" + return result.cast(f"MAP({col1_dtype}, {col2_dtype})") @meta(unsupported_engines=["bigquery", "postgres", "snowflake"]) @@ -1404,14 +2056,43 @@ def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column: @meta() def array_contains(col: ColumnOrName, value: ColumnOrLiteral) -> Column: - value_col = value if isinstance(value, Column) else lit(value) + from sqlframe.base.function_alternatives import array_contains_any + + session = _get_session() + lit_func = get_func_from_session("lit") + + if session._is_postgres: + return array_contains_any(col, value) + + value = value if isinstance(value, Column) else lit_func(value) + + if session._is_snowflake: + value = value.cast("variant") + return Column.invoke_expression_over_column( - col, expression.ArrayContains, expression=value_col.expression + col, expression.ArrayContains, expression=value.expression ) @meta(unsupported_engines="bigquery") def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + arrays_overlap_as_plural, + arrays_overlap_renamed, + arrays_overlap_using_intersect, + ) + + session = _get_session() + + if session._is_duckdb: + return arrays_overlap_using_intersect(col1, col2) + + if session._is_databricks or session._is_spark: + return arrays_overlap_renamed(col1, col2) + + if session._is_snowflake: + return arrays_overlap_as_plural(col1, col2) + return Column.invoke_expression_over_column(col1, expression.ArrayOverlaps, expression=col2) @@ -1419,6 +2100,27 @@ def arrays_overlap(col1: ColumnOrName, col2: ColumnOrName) -> Column: def slice( x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int] ) -> Column: + from sqlframe.base.function_alternatives import ( + slice_as_array_slice, + slice_as_list_slice, + slice_bgutil, + slice_with_brackets, + ) + + session = _get_session() + + if session._is_bigquery: + return slice_bgutil(x, start, length) + + if session._is_duckdb: + return slice_as_list_slice(x, start, length) + + if session._is_postgres: + return slice_with_brackets(x, start, length) + + if session._is_snowflake: + return slice_as_array_slice(x, start, length) + start_col = lit(start) if isinstance(start, int) else start length_col = lit(length) if isinstance(length, int) else length return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col) @@ -1428,6 +2130,13 @@ def slice( def array_join( col: ColumnOrName, delimiter: str, null_replacement: t.Optional[str] = None ) -> Column: + session = _get_session() + + if session._is_snowflake: + if null_replacement is not None: + logger.warning("Null replacement is ignored since it is not supported in this dialect") + null_replacement = None + if null_replacement is not None: return Column.invoke_expression_over_column( col, expression.ArrayToString, expression=lit(delimiter), null=lit(null_replacement) @@ -1444,6 +2153,19 @@ def concat(*cols: ColumnOrName) -> Column: @meta() def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column: + from sqlframe.base.function_alternatives import ( + array_position_bgutil, + array_position_cast_variant_and_flip, + ) + + session = _get_session() + + if session._is_bigquery: + return array_position_bgutil(col, value) + + if session._is_snowflake: + return array_position_cast_variant_and_flip(col, value) + value_col = value if isinstance(value, Column) else lit(value) # Some engines return NULL if item is not found but Spark expects 0 so we coalesce to 0 return coalesce(Column.invoke_anonymous_function(col, "ARRAY_POSITION", value_col), lit(0)) @@ -1451,28 +2173,75 @@ def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column: @meta() def element_at(col: ColumnOrName, value: ColumnOrLiteral) -> Column: + from sqlframe.base.function_alternatives import element_at_using_brackets + + session = _get_session() + + if session._is_bigquery or session._is_duckdb or session._is_postgres or session._is_snowflake: + return element_at_using_brackets(col, value) + value_col = value if isinstance(value, Column) else lit(value) return Column.invoke_anonymous_function(col, "ELEMENT_AT", value_col) @meta() def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column: + from sqlframe.base.function_alternatives import ( + array_remove_bgutil, + array_remove_using_filter, + ) + + session = _get_session() + + if session._is_bigquery: + return array_remove_bgutil(col, value) + + if session._is_duckdb: + return array_remove_using_filter(col, value) + value_col = value if isinstance(value, Column) else lit(value) return Column.invoke_anonymous_function(col, "ARRAY_REMOVE", value_col) @meta(unsupported_engines="postgres") def array_distinct(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import array_distinct_bgutil + + session = _get_session() + + if session._is_bigquery: + return array_distinct_bgutil(col) + return Column.invoke_anonymous_function(col, "ARRAY_DISTINCT") @meta(unsupported_engines=["bigquery", "postgres"]) def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import array_intersect_using_intersection + + session = _get_session() + + if session._is_snowflake: + return array_intersect_using_intersection(col1, col2) + return Column.invoke_anonymous_function(col1, "ARRAY_INTERSECT", Column.ensure_col(col2)) @meta(unsupported_engines=["postgres"]) def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + array_union_using_array_concat, + array_union_using_list_concat, + ) + + session = _get_session() + + if session._is_duckdb: + return array_union_using_list_concat(col1, col2) + + if session._is_bigquery or session._is_snowflake: + return array_union_using_array_concat(col1, col2) + return Column.invoke_anonymous_function(col1, "ARRAY_UNION", Column.ensure_col(col2)) @@ -1504,6 +2273,19 @@ def posexplode_outer(col: ColumnOrName) -> Column: # Snowflake doesn't support JSONPath which is what this function uses @meta(unsupported_engines="snowflake") def get_json_object(col: ColumnOrName, path: str) -> Column: + from sqlframe.base.function_alternatives import ( + get_json_object_using_arrow_op, + get_json_object_using_function, + ) + + session = _get_session() + + if session._is_databricks: + return get_json_object_using_function(col, path) + + if session._is_postgres: + return get_json_object_using_arrow_op(col, path) + return Column.invoke_expression_over_column(col, expression.JSONExtract, expression=lit(path)) @@ -1572,16 +2354,63 @@ def size(col: ColumnOrName) -> Column: @meta() def array_min(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + array_min_bgutil, + array_min_from_sort, + array_min_from_subquery, + ) + + session = _get_session() + + if session._is_bigquery: + return array_min_bgutil(col) + + if session._is_duckdb: + return array_min_from_sort(col) + + if session._is_postgres: + return array_min_from_subquery(col) + return Column.invoke_anonymous_function(col, "ARRAY_MIN") @meta() def array_max(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + array_max_bgutil, + array_max_from_sort, + array_max_from_subquery, + ) + + session = _get_session() + + if session._is_bigquery: + return array_max_bgutil(col) + + if session._is_duckdb: + return array_max_from_sort(col) + + if session._is_postgres: + return array_max_from_subquery(col) + return Column.invoke_anonymous_function(col, "ARRAY_MAX") @meta(unsupported_engines="postgres") def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column: + from sqlframe.base.function_alternatives import ( + sort_array_bgutil, + sort_array_using_array_sort, + ) + + session = _get_session() + + if session._is_bigquery: + return sort_array_bgutil(col, asc) + + if session._is_snowflake: + return sort_array_using_array_sort(col, asc) + if asc is not None: return Column.invoke_expression_over_column(col, expression.SortArray, asc=lit(asc)) return Column.invoke_expression_over_column(col, expression.SortArray) @@ -1592,6 +2421,12 @@ def array_sort( col: ColumnOrName, comparator: t.Optional[t.Union[t.Callable[[Column, Column], Column]]] = None, ) -> Column: + session = _get_session() + sort_array_func = get_func_from_session("sort_array") + + if session._is_bigquery: + return sort_array_func(col, comparator) + if comparator is not None: f_expression = _get_lambda_from_func(comparator) return Column.invoke_expression_over_column( @@ -1612,6 +2447,13 @@ def reverse(col: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "postgres"]) def flatten(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import flatten_using_array_flatten + + session = _get_session() + + if session._is_snowflake: + return flatten_using_array_flatten(col) + return Column.invoke_expression_over_column(col, expression.Flatten) @@ -1650,6 +2492,13 @@ def arrays_zip(*cols: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "duckdb", "postgres"]) def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column: + from sqlframe.base.function_alternatives import map_concat_using_map_cat + + session = _get_session() + + if session._is_snowflake: + return map_concat_using_map_cat(*cols) + columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore if len(columns) == 1: return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT") # type: ignore @@ -1660,6 +2509,23 @@ def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column def sequence( start: ColumnOrName, stop: ColumnOrName, step: t.Optional[ColumnOrName] = None ) -> Column: + from sqlframe.base.function_alternatives import ( + sequence_from_array_generate_range, + sequence_from_generate_array, + sequence_from_generate_series, + ) + + session = _get_session() + + if session._is_bigquery: + return sequence_from_generate_array(start, stop, step) + + if session._is_duckdb: + return sequence_from_generate_series(start, stop, step) + + if session._is_snowflake: + return sequence_from_array_generate_range(start, stop, step) + return Column( expression.GenerateSeries( start=Column.ensure_col(start).expression, @@ -1778,6 +2644,23 @@ def map_zip_with( @meta() def typeof(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + typeof_bgutil, + typeof_from_variant, + typeof_pg_typeof, + ) + + session = _get_session() + + if session._is_bigquery: + return typeof_bgutil(col) + + if session._is_postgres: + return typeof_pg_typeof(col) + + if session._is_snowflake: + return typeof_from_variant(col) + return Column.invoke_anonymous_function(col, "TYPEOF") @@ -1913,6 +2796,18 @@ def to_binary(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) -> Col @meta() def any_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None) -> Column: + session = _get_session() + + if session._is_duckdb: + if not ignoreNulls: + logger.warning("Nulls are always ignored when using `ANY_VALUE` on this engine") + ignoreNulls = None + + if session._is_bigquery or session._is_postgres or session._is_snowflake: + if ignoreNulls: + logger.warning("Ignoring nulls is not supported in this dialect") + ignoreNulls = None + column = Column.invoke_expression_over_column(col, expression.AnyValue) if ignoreNulls: return Column(expression.IgnoreNulls(this=column.expression)) @@ -2180,6 +3075,13 @@ def current_timezone() -> Column: @meta() def current_user() -> Column: + from sqlframe.base.function_alternatives import current_user_from_session_user + + session = _get_session() + + if session._is_bigquery: + return current_user_from_session_user() + return Column.invoke_expression_over_column(None, expression.CurrentUser) @@ -2204,6 +3106,21 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column: @meta(unsupported_engines=["bigquery", "postgres", "snowflake"]) def day(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import day_with_try_to_timestamp + + session = _get_session() + + if session._is_duckdb: + try_to_timestamp = get_func_from_session("try_to_timestamp") + to_date = get_func_from_session("to_date") + when = get_func_from_session("when") + _is_string = get_func_from_session("_is_string") + coalesce = get_func_from_session("coalesce") + col = when( + _is_string(col), + coalesce(try_to_timestamp(col), to_date(col)), + ).otherwise(col) + return Column.invoke_expression_over_column(col, expression.Day) @@ -2222,6 +3139,19 @@ def elt(*inputs: ColumnOrName) -> Column: @meta() def endswith(str: ColumnOrName, suffix: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + endswith_using_like, + endswith_with_underscore, + ) + + session = _get_session() + + if session._is_bigquery or session._is_duckdb: + return endswith_with_underscore(str, suffix) + + if session._is_postgres: + return endswith_using_like(str, suffix) + return Column.invoke_anonymous_function(str, "endswith", suffix) @@ -2237,6 +3167,11 @@ def every(col: ColumnOrName) -> Column: @meta() def extract(field: ColumnOrName, source: ColumnOrName) -> Column: + session = _get_session() + + if session._is_bigquery: + field = expression.Var(this=Column.ensure_col(field).alias_or_name) # type: ignore + return Column.invoke_expression_over_column(field, expression.Extract, expression=source) @@ -2963,6 +3898,11 @@ def left(str: ColumnOrName, len: ColumnOrName) -> Column: >>> df.select(left(df.a, df.b).alias('r')).collect() [Row(r='Spa')] """ + session = _get_session() + + if session._is_postgres: + len = Column.ensure_col(len).cast("integer") + return Column.invoke_expression_over_column(str, expression.Left, expression=len) @@ -3853,6 +4793,16 @@ def position( | 4| +-----------------+ """ + from sqlframe.base.function_alternatives import position_as_strpos + + session = _get_session() + + if session._is_bigquery: + return position_as_strpos(substr, str, start) + + if session._is_postgres: + start = Column.ensure_col(start).cast("integer") if start else None + if start is not None: return Column.invoke_expression_over_column( str, expression.StrPosition, substr=substr, position=start @@ -4038,6 +4988,13 @@ def regexp(str: ColumnOrName, regexp: ColumnOrName) -> Column: | true| +-------------------+ """ + from sqlframe.base.function_alternatives import regexp_extract_only_one_group + + session = _get_session() + + if session._is_bigquery: + return regexp_extract_only_one_group(str, regexp) # type: ignore + return Column.invoke_anonymous_function(str, "regexp", regexp) @@ -4575,6 +5532,11 @@ def right(str: ColumnOrName, len: ColumnOrName) -> Column: >>> df.select(right(df.a, df.b).alias('r')).collect() [Row(r='SQL')] """ + session = _get_session() + + if session._is_postgres: + len = Column.ensure_col(len).cast("integer") + return Column.invoke_expression_over_column(str, expression.Right, expression=len) @@ -5030,6 +5992,13 @@ def to_number(col: ColumnOrName, format: ColumnOrName) -> Column: >>> df.select(to_number(df.e, lit("$99.99")).alias('r')).collect() [Row(r=Decimal('78.12'))] """ + from sqlframe.base.function_alternatives import to_number_using_to_double + + session = _get_session() + + if session._is_snowflake: + return to_number_using_to_double(col, format) + return Column.invoke_expression_over_column(col, expression.ToNumber, format=format) @@ -5149,11 +6118,14 @@ def to_unix_timestamp( [Row(r=None)] >>> spark.conf.unset("spark.sql.session.timeZone") """ - from sqlframe.base.session import _BaseSession + session = _get_session() + + if session._is_duckdb: + format = format or _BaseSession().default_time_format if format is not None: return Column.invoke_expression_over_column( - timestamp, expression.StrToUnix, format=_BaseSession().format_time(format) + timestamp, expression.StrToUnix, format=session.format_time(format) ) else: return Column.invoke_expression_over_column(timestamp, expression.StrToUnix) @@ -5306,6 +6278,17 @@ def try_element_at(col: ColumnOrName, extraction: ColumnOrName) -> Column: >>> df.select(try_element_at(df.data, lit("a")).alias('r')).collect() [Row(r=1.0)] """ + session = _get_session() + + if session._is_databricks or session._is_duckdb or session._is_postgres or session._is_spark: + lit = get_func_from_session("lit") + extraction = Column.ensure_col(extraction) + if ( + isinstance(extraction.expression, expression.Literal) + and extraction.expression.is_number + ): + extraction = extraction - lit(1) + return Column( expression.Bracket( this=Column.ensure_col(col).expression, @@ -5340,12 +6323,27 @@ def try_to_timestamp(col: ColumnOrName, format: t.Optional[ColumnOrName] = None) >>> df.select(try_to_timestamp(df.t, lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).collect() [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))] """ - from sqlframe.base.session import _BaseSession + from sqlframe.base.function_alternatives import ( + try_to_timestamp_pgtemp, + try_to_timestamp_safe, + try_to_timestamp_strptime, + ) + + session = _get_session() + + if session._is_bigquery: + return try_to_timestamp_safe(col, format) + + if session._is_duckdb: + return try_to_timestamp_strptime(col, format) + + if session._is_postgres: + return try_to_timestamp_pgtemp(col, format) return Column.invoke_anonymous_function( col, "try_to_timestamp", - _BaseSession().format_execution_time(format), # type: ignore + session.format_execution_time(format), # type: ignore ) @@ -5841,6 +6839,27 @@ def years(col: ColumnOrName) -> Column: # SQLFrame specific @meta() def _is_string(col: ColumnOrName) -> Column: + from sqlframe.base.function_alternatives import ( + _is_string_using_typeof_char_varying, + _is_string_using_typeof_string, + _is_string_using_typeof_string_lcase, + _is_string_using_typeof_varchar, + ) + + session = _get_session() + + if session._is_bigquery: + return _is_string_using_typeof_string(col) + + if session._is_duckdb: + return _is_string_using_typeof_varchar(col) + + if session._is_postgres: + return _is_string_using_typeof_char_varying(col) + + if session._is_databricks or session._is_spark: + return _is_string_using_typeof_string_lcase(col) + col = Column.invoke_anonymous_function(col, "TO_VARIANT") return Column.invoke_anonymous_function(col, "IS_VARCHAR") diff --git a/sqlframe/base/session.py b/sqlframe/base/session.py index a74a2a5..bac30c3 100644 --- a/sqlframe/base/session.py +++ b/sqlframe/base/session.py @@ -583,6 +583,38 @@ def _to_row(cls, columns: t.List[str], values: t.Iterable[t.Any]) -> Row: converted_values.append(cls._to_value(value)) return _create_row(columns, converted_values) + @property + def _is_bigquery(self) -> bool: + return False + + @property + def _is_databricks(self) -> bool: + return False + + @property + def _is_duckdb(self) -> bool: + return False + + @property + def _is_postgres(self) -> bool: + return False + + @property + def _is_redshift(self) -> bool: + return False + + @property + def _is_snowflake(self) -> bool: + return False + + @property + def _is_spark(self) -> bool: + return False + + @property + def _is_standalone(self) -> bool: + return False + class Builder: SQLFRAME_INPUT_DIALECT_KEY = "sqlframe.input.dialect" SQLFRAME_OUTPUT_DIALECT_KEY = "sqlframe.output.dialect" diff --git a/sqlframe/bigquery/functions.py b/sqlframe/bigquery/functions.py index 0219ad4..fcf1c7f 100644 --- a/sqlframe/bigquery/functions.py +++ b/sqlframe/bigquery/functions.py @@ -2,17 +2,8 @@ import inspect import sys -import typing as t -from sqlglot import exp as sqlglot_expression - -from sqlframe.base.util import ( - get_func_from_session, -) -from sqlframe.bigquery.column import Column - -if t.TYPE_CHECKING: - from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName +import sqlframe.base.functions # noqa module = sys.modules["sqlframe.base.functions"] globals().update( @@ -24,354 +15,3 @@ and "*" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import ( # noqa - any_value_ignore_nulls_not_supported as any_value, - current_user_from_session_user as current_user, - e_literal as e, - expm1_from_exp as expm1, - extract_convert_to_var as extract, - factorial_from_case_statement as factorial, - log1p_from_log as log1p, - rint_from_round as rint, - collect_set_from_list_distinct as collect_set, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - percentile_approx_without_accuracy_and_plural as percentile_approx, - rand_no_seed as rand, - year_from_extract as year, - quarter_from_extract as quarter, - month_from_extract as month, - dayofweek_from_extract as dayofweek, - dayofmonth_from_extract_with_day as dayofmonth, - dayofyear_from_extract as dayofyear, - hour_from_extract as hour, - minute_from_extract as minute, - second_from_extract as second, - weekofyear_from_extract_as_isoweek as weekofyear, - make_date_from_date_func as make_date, - to_date_from_timestamp as to_date, - last_day_with_cast as last_day, - sha1_force_sha1_and_to_hex as sha, - sha1_force_sha1_and_to_hex as sha1, - hash_from_farm_fingerprint as hash, - base64_from_blob as base64, - concat_ws_from_array_to_string as concat_ws, - format_string_with_format as format_string, - instr_using_strpos as instr, - overlay_from_substr as overlay, - split_with_split as split, - regexp_extract_only_one_group as regexp_extract, - hex_casted_as_bytes as hex, - bit_length_from_length as bit_length, - element_at_using_brackets as element_at, - array_union_using_array_concat as array_union, - sequence_from_generate_array as sequence, - position_as_strpos as position, - try_to_timestamp_safe as try_to_timestamp, - _is_string_using_typeof_string as _is_string, - array_append_using_array_cat as array_append, - endswith_with_underscore as endswith, - to_timestamp_just_timestamp as to_timestamp, -) - - -def typeof(col: ColumnOrName) -> Column: - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.typeof", expressions=[Column.ensure_col(col).expression] - ) - ) - - -def degrees(col: ColumnOrName) -> Column: - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.degrees", expressions=[Column.ensure_col(col).expression] - ) - ) - - -def radians(col: ColumnOrName) -> Column: - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.radians", expressions=[Column.ensure_col(col).expression] - ) - ) - - -def bround(col: ColumnOrName, scale: t.Optional[int] = None) -> Column: - from sqlframe.base.session import _BaseSession - - lit = get_func_from_session("lit", _BaseSession()) - - expressions = [Column.ensure_col(col).cast("bignumeric").expression] - if scale is not None: - expressions.append(lit(scale).expression) - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_round_half_even", - expressions=expressions, - ) - ) - - -def months_between( - date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = None -) -> Column: - roundOff = True if roundOff is None else roundOff - round = get_func_from_session("round") - lit = get_func_from_session("lit") - - value = Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_months_between", - expressions=[ - Column.ensure_col(date1).cast("datetime").expression, - Column.ensure_col(date2).cast("datetime").expression, - ], - ) - ) - if roundOff: - value = round(value, lit(8)) - return value - - -def next_day(col: ColumnOrName, dayOfWeek: str) -> Column: - lit = get_func_from_session("lit") - - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_next_day", - expressions=[Column.ensure_col(col).cast("date").expression, lit(dayOfWeek).expression], - ) - ) - - -def from_unixtime(col: ColumnOrName, format: t.Optional[str] = None) -> Column: - from sqlframe.base.session import _BaseSession - - session: _BaseSession = _BaseSession() - - expressions = [Column.ensure_col(col).expression] - return Column( - sqlglot_expression.Anonymous( - this="FORMAT_TIMESTAMP", - expressions=[ - session.format_time(format), - Column( - sqlglot_expression.Anonymous(this="TIMESTAMP_SECONDS", expressions=expressions) - ).expression, - ], - ) - ) - - -def unix_timestamp( - timestamp: t.Optional[ColumnOrName] = None, format: t.Optional[str] = None -) -> Column: - from sqlframe.base.session import _BaseSession - - lit = get_func_from_session("lit") - return Column( - sqlglot_expression.Anonymous( - this="UNIX_SECONDS", - expressions=[ - sqlglot_expression.Anonymous( - this="PARSE_TIMESTAMP", - expressions=[ - _BaseSession().format_time(format), - Column.ensure_col(timestamp).expression, - lit("UTC").expression, - ], - ) - ], - ) - ) - - -def format_number(col: ColumnOrName, d: int) -> Column: - round = get_func_from_session("round") - lit = get_func_from_session("lit") - - return Column( - sqlglot_expression.Anonymous( - this="FORMAT", - expressions=[ - lit(f"%'.{d}f").expression, - round(Column.ensure_col(col).cast("float"), d).expression, - ], - ) - ) - - -def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: - lit = get_func_from_session("lit") - - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_substring_index", - expressions=[ - Column.ensure_col(str).expression, - lit(delim).expression, - lit(count).expression, - ], - ) - ) - - -def bin(col: ColumnOrName) -> Column: - return ( - Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.to_binary", - expressions=[Column.ensure_col(col).expression], - ) - ) - .cast("int") - .cast("string") - ) - - -def slice( - x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int] -) -> Column: - lit = get_func_from_session("lit") - - start_col = start if isinstance(start, Column) else lit(start) - length_col = length if isinstance(length, Column) else lit(length) - - subquery = ( - sqlglot_expression.select( - sqlglot_expression.column("x"), - ) - .from_( - sqlglot_expression.Unnest( - expressions=[Column.ensure_col(x).expression], - alias=sqlglot_expression.TableAlias( - columns=[sqlglot_expression.to_identifier("x")], - ), - offset=sqlglot_expression.to_identifier("offset"), - ) - ) - .where( - sqlglot_expression.Between( - this=sqlglot_expression.column("offset"), - low=(start_col - lit(1)).expression, - high=(start_col + length_col).expression, - ) - ) - ) - - return Column( - sqlglot_expression.Anonymous( - this="ARRAY", - expressions=[subquery], - ) - ) - - -def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column: - lit = get_func_from_session("lit") - - value_col = value if isinstance(value, Column) else lit(value) - - return Column( - sqlglot_expression.Coalesce( - this=sqlglot_expression.Anonymous( - this="bqutil.fn.find_in_set", - expressions=[ - value_col.expression, - sqlglot_expression.Anonymous( - this="ARRAY_TO_STRING", - expressions=[Column.ensure_col(col).expression, lit(",").expression], - ), - ], - ), - expressions=[lit(0).expression], - ) - ) - - -def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column: - lit = get_func_from_session("lit") - - value_col = value if isinstance(value, Column) else lit(value) - - filter_subquery = sqlglot_expression.select( - "*", - ).from_( - sqlglot_expression.Unnest( - expressions=[Column.ensure_col(col).expression], - alias=sqlglot_expression.TableAlias( - columns=[sqlglot_expression.to_identifier("x")], - ), - ) - ) - - agg_subquery = ( - sqlglot_expression.select( - sqlglot_expression.Anonymous( - this="ARRAY_AGG", - expressions=[sqlglot_expression.column("x")], - ), - ) - .from_(filter_subquery.subquery("t")) - .where( - sqlglot_expression.NEQ( - this=sqlglot_expression.column("x", "t"), - expression=value_col.expression, - ) - ) - ) - - return Column(agg_subquery.subquery()) - - -def array_distinct(col: ColumnOrName) -> Column: - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_array_distinct", - expressions=[Column.ensure_col(col).expression], - ) - ) - - -def array_min(col: ColumnOrName) -> Column: - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_array_min", - expressions=[Column.ensure_col(col).expression], - ) - ) - - -def array_max(col: ColumnOrName) -> Column: - return Column( - sqlglot_expression.Anonymous( - this="bqutil.fn.cw_array_max", - expressions=[Column.ensure_col(col).expression], - ) - ) - - -def sort_array(col: ColumnOrName, asc: t.Optional[bool] = None) -> Column: - order = "ASC" if asc or asc is None else "DESC" - subquery = ( - sqlglot_expression.select("x") - .from_( - sqlglot_expression.Unnest( - expressions=[Column.ensure_col(col).expression], - alias=sqlglot_expression.TableAlias( - columns=[sqlglot_expression.to_identifier("x")], - ), - ) - ) - .order_by(f"x {order}") - ) - - return Column(sqlglot_expression.Anonymous(this="ARRAY", expressions=[subquery])) - - -array_sort = sort_array diff --git a/sqlframe/bigquery/functions.pyi b/sqlframe/bigquery/functions.pyi index 7a16234..fd5c775 100644 --- a/sqlframe/bigquery/functions.pyi +++ b/sqlframe/bigquery/functions.pyi @@ -1,138 +1,21 @@ -import typing as t - -from sqlframe.base.column import Column as Column -from sqlframe.base.function_alternatives import ( # noqa - any_value_ignore_nulls_not_supported as any_value, -) -from sqlframe.base.function_alternatives import ( - array_union_using_array_concat as array_union, -) -from sqlframe.base.function_alternatives import ( - base64_from_blob as base64, -) -from sqlframe.base.function_alternatives import ( - bit_length_from_length as bit_length, -) -from sqlframe.base.function_alternatives import ( - collect_set_from_list_distinct as collect_set, -) -from sqlframe.base.function_alternatives import ( - concat_ws_from_array_to_string as concat_ws, -) -from sqlframe.base.function_alternatives import ( - current_user_from_session_user as current_user, -) -from sqlframe.base.function_alternatives import ( - dayofmonth_from_extract_with_day as dayofmonth, -) -from sqlframe.base.function_alternatives import ( - dayofweek_from_extract as dayofweek, -) -from sqlframe.base.function_alternatives import ( - dayofyear_from_extract as dayofyear, -) -from sqlframe.base.function_alternatives import ( - e_literal as e, -) -from sqlframe.base.function_alternatives import ( - element_at_using_brackets as element_at, -) -from sqlframe.base.function_alternatives import ( - expm1_from_exp as expm1, -) -from sqlframe.base.function_alternatives import ( - extract_convert_to_var as extract, -) -from sqlframe.base.function_alternatives import ( - factorial_from_case_statement as factorial, -) -from sqlframe.base.function_alternatives import ( - format_string_with_format as format_string, -) -from sqlframe.base.function_alternatives import ( - hash_from_farm_fingerprint as hash, -) -from sqlframe.base.function_alternatives import ( - hex_casted_as_bytes as hex, -) -from sqlframe.base.function_alternatives import ( - hour_from_extract as hour, -) -from sqlframe.base.function_alternatives import ( - instr_using_strpos as instr, -) -from sqlframe.base.function_alternatives import ( - isnull_using_equal as isnull, -) -from sqlframe.base.function_alternatives import ( - last_day_with_cast as last_day, -) -from sqlframe.base.function_alternatives import ( - log1p_from_log as log1p, -) -from sqlframe.base.function_alternatives import ( - make_date_from_date_func as make_date, -) -from sqlframe.base.function_alternatives import ( - minute_from_extract as minute, -) -from sqlframe.base.function_alternatives import ( - month_from_extract as month, -) -from sqlframe.base.function_alternatives import ( - nanvl_as_case as nanvl, -) -from sqlframe.base.function_alternatives import ( - overlay_from_substr as overlay, -) -from sqlframe.base.function_alternatives import ( - percentile_approx_without_accuracy_and_plural as percentile_approx, -) -from sqlframe.base.function_alternatives import ( - position_as_strpos as position, -) -from sqlframe.base.function_alternatives import ( - quarter_from_extract as quarter, -) -from sqlframe.base.function_alternatives import ( - rand_no_seed as rand, -) -from sqlframe.base.function_alternatives import ( - regexp_extract_only_one_group as regexp_extract, -) -from sqlframe.base.function_alternatives import ( - rint_from_round as rint, -) -from sqlframe.base.function_alternatives import ( - second_from_extract as second, -) -from sqlframe.base.function_alternatives import ( - sequence_from_generate_array as sequence, -) -from sqlframe.base.function_alternatives import ( - sha1_force_sha1_and_to_hex as sha1, -) -from sqlframe.base.function_alternatives import ( - split_with_split as split, -) -from sqlframe.base.function_alternatives import ( - to_date_from_timestamp as to_date, -) -from sqlframe.base.function_alternatives import ( - weekofyear_from_extract_as_isoweek as weekofyear, -) -from sqlframe.base.function_alternatives import ( - year_from_extract as year, -) from sqlframe.base.functions import abs as abs from sqlframe.base.functions import acos as acos from sqlframe.base.functions import acosh as acosh from sqlframe.base.functions import add_months as add_months +from sqlframe.base.functions import any_value as any_value from sqlframe.base.functions import approx_count_distinct as approx_count_distinct from sqlframe.base.functions import approxCountDistinct as approxCountDistinct from sqlframe.base.functions import array as array +from sqlframe.base.functions import array_append as array_append from sqlframe.base.functions import array_contains as array_contains +from sqlframe.base.functions import array_distinct as array_distinct from sqlframe.base.functions import array_join as array_join +from sqlframe.base.functions import array_max as array_max +from sqlframe.base.functions import array_min as array_min +from sqlframe.base.functions import array_position as array_position +from sqlframe.base.functions import array_remove as array_remove +from sqlframe.base.functions import array_sort as array_sort +from sqlframe.base.functions import array_union as array_union from sqlframe.base.functions import asc as asc from sqlframe.base.functions import asc_nulls_first as asc_nulls_first from sqlframe.base.functions import asc_nulls_last as asc_nulls_last @@ -143,10 +26,14 @@ from sqlframe.base.functions import atan as atan from sqlframe.base.functions import atan2 as atan2 from sqlframe.base.functions import atanh as atanh from sqlframe.base.functions import avg as avg +from sqlframe.base.functions import base64 as base64 +from sqlframe.base.functions import bin as bin +from sqlframe.base.functions import bit_length as bit_length from sqlframe.base.functions import bitwise_not as bitwise_not from sqlframe.base.functions import bitwiseNOT as bitwiseNOT from sqlframe.base.functions import bool_and as bool_and from sqlframe.base.functions import bool_or as bool_or +from sqlframe.base.functions import bround as bround from sqlframe.base.functions import call_function as call_function from sqlframe.base.functions import cbrt as cbrt from sqlframe.base.functions import ceil as ceil @@ -155,7 +42,9 @@ from sqlframe.base.functions import char as char from sqlframe.base.functions import coalesce as coalesce from sqlframe.base.functions import col as col from sqlframe.base.functions import collect_list as collect_list +from sqlframe.base.functions import collect_set as collect_set from sqlframe.base.functions import concat as concat +from sqlframe.base.functions import concat_ws as concat_ws from sqlframe.base.functions import corr as corr from sqlframe.base.functions import cos as cos from sqlframe.base.functions import cosh as cosh @@ -168,6 +57,7 @@ from sqlframe.base.functions import csc as csc from sqlframe.base.functions import cume_dist as cume_dist from sqlframe.base.functions import current_date as current_date from sqlframe.base.functions import current_timestamp as current_timestamp +from sqlframe.base.functions import current_user as current_user from sqlframe.base.functions import date_add as date_add from sqlframe.base.functions import date_diff as date_diff from sqlframe.base.functions import date_format as date_format @@ -175,22 +65,41 @@ from sqlframe.base.functions import date_sub as date_sub from sqlframe.base.functions import date_trunc as date_trunc from sqlframe.base.functions import dateadd as dateadd from sqlframe.base.functions import datediff as datediff +from sqlframe.base.functions import dayofmonth as dayofmonth +from sqlframe.base.functions import dayofweek as dayofweek +from sqlframe.base.functions import dayofyear as dayofyear +from sqlframe.base.functions import degrees as degrees from sqlframe.base.functions import dense_rank as dense_rank from sqlframe.base.functions import desc as desc from sqlframe.base.functions import desc_nulls_first as desc_nulls_first from sqlframe.base.functions import desc_nulls_last as desc_nulls_last +from sqlframe.base.functions import e as e +from sqlframe.base.functions import element_at as element_at +from sqlframe.base.functions import endswith as endswith from sqlframe.base.functions import exp as exp from sqlframe.base.functions import explode as explode from sqlframe.base.functions import explode_outer as explode_outer +from sqlframe.base.functions import expm1 as expm1 from sqlframe.base.functions import expr as expr +from sqlframe.base.functions import extract as extract +from sqlframe.base.functions import factorial as factorial from sqlframe.base.functions import floor as floor +from sqlframe.base.functions import format_number as format_number +from sqlframe.base.functions import format_string as format_string +from sqlframe.base.functions import from_unixtime as from_unixtime from sqlframe.base.functions import get_json_object as get_json_object from sqlframe.base.functions import greatest as greatest +from sqlframe.base.functions import hash as hash +from sqlframe.base.functions import hex as hex +from sqlframe.base.functions import hour as hour from sqlframe.base.functions import ifnull as ifnull from sqlframe.base.functions import initcap as initcap from sqlframe.base.functions import input_file_name as input_file_name +from sqlframe.base.functions import instr as instr from sqlframe.base.functions import isnan as isnan +from sqlframe.base.functions import isnull as isnull from sqlframe.base.functions import lag as lag +from sqlframe.base.functions import last_day as last_day from sqlframe.base.functions import lcase as lcase from sqlframe.base.functions import lead as lead from sqlframe.base.functions import least as least @@ -199,17 +108,24 @@ from sqlframe.base.functions import length as length from sqlframe.base.functions import lit as lit from sqlframe.base.functions import ln as ln from sqlframe.base.functions import log as log +from sqlframe.base.functions import log1p as log1p from sqlframe.base.functions import log2 as log2 from sqlframe.base.functions import log10 as log10 from sqlframe.base.functions import lower as lower from sqlframe.base.functions import lpad as lpad from sqlframe.base.functions import ltrim as ltrim +from sqlframe.base.functions import make_date as make_date from sqlframe.base.functions import max as max from sqlframe.base.functions import max_by as max_by from sqlframe.base.functions import md5 as md5 from sqlframe.base.functions import mean as mean from sqlframe.base.functions import min as min from sqlframe.base.functions import min_by as min_by +from sqlframe.base.functions import minute as minute +from sqlframe.base.functions import month as month +from sqlframe.base.functions import months_between as months_between +from sqlframe.base.functions import nanvl as nanvl +from sqlframe.base.functions import next_day as next_day from sqlframe.base.functions import now as now from sqlframe.base.functions import nth_value as nth_value from sqlframe.base.functions import ntile as ntile @@ -217,24 +133,35 @@ from sqlframe.base.functions import nullif as nullif from sqlframe.base.functions import nvl as nvl from sqlframe.base.functions import nvl2 as nvl2 from sqlframe.base.functions import octet_length as octet_length +from sqlframe.base.functions import overlay as overlay from sqlframe.base.functions import percent_rank as percent_rank +from sqlframe.base.functions import percentile_approx as percentile_approx from sqlframe.base.functions import posexplode as posexplode from sqlframe.base.functions import posexplode_outer as posexplode_outer +from sqlframe.base.functions import position as position from sqlframe.base.functions import pow as pow from sqlframe.base.functions import power as power +from sqlframe.base.functions import quarter as quarter +from sqlframe.base.functions import radians as radians +from sqlframe.base.functions import rand as rand from sqlframe.base.functions import rank as rank +from sqlframe.base.functions import regexp_extract as regexp_extract from sqlframe.base.functions import regexp_like as regexp_like from sqlframe.base.functions import regexp_replace as regexp_replace from sqlframe.base.functions import repeat as repeat from sqlframe.base.functions import reverse as reverse from sqlframe.base.functions import right as right +from sqlframe.base.functions import rint as rint from sqlframe.base.functions import rlike as rlike from sqlframe.base.functions import round as round from sqlframe.base.functions import row_number as row_number from sqlframe.base.functions import rpad as rpad from sqlframe.base.functions import rtrim as rtrim from sqlframe.base.functions import sec as sec +from sqlframe.base.functions import second as second +from sqlframe.base.functions import sequence as sequence from sqlframe.base.functions import sha as sha +from sqlframe.base.functions import sha1 as sha1 from sqlframe.base.functions import shiftLeft as shiftLeft from sqlframe.base.functions import shiftleft as shiftleft from sqlframe.base.functions import shiftRight as shiftRight @@ -244,7 +171,10 @@ from sqlframe.base.functions import signum as signum from sqlframe.base.functions import sin as sin from sqlframe.base.functions import sinh as sinh from sqlframe.base.functions import size as size +from sqlframe.base.functions import slice as slice +from sqlframe.base.functions import sort_array as sort_array from sqlframe.base.functions import soundex as soundex +from sqlframe.base.functions import split as split from sqlframe.base.functions import sqrt as sqrt from sqlframe.base.functions import startswith as startswith from sqlframe.base.functions import stddev as stddev @@ -253,54 +183,31 @@ from sqlframe.base.functions import stddev_samp as stddev_samp from sqlframe.base.functions import struct as struct from sqlframe.base.functions import substr as substr from sqlframe.base.functions import substring as substring +from sqlframe.base.functions import substring_index as substring_index from sqlframe.base.functions import sum as sum from sqlframe.base.functions import sum_distinct as sum_distinct from sqlframe.base.functions import sumDistinct as sumDistinct from sqlframe.base.functions import tan as tan from sqlframe.base.functions import tanh as tanh from sqlframe.base.functions import timestamp_seconds as timestamp_seconds +from sqlframe.base.functions import to_date as to_date from sqlframe.base.functions import to_timestamp as to_timestamp from sqlframe.base.functions import toDegrees as toDegrees from sqlframe.base.functions import toRadians as toRadians from sqlframe.base.functions import translate as translate from sqlframe.base.functions import trim as trim from sqlframe.base.functions import trunc as trunc +from sqlframe.base.functions import try_to_timestamp as try_to_timestamp +from sqlframe.base.functions import typeof as typeof from sqlframe.base.functions import ucase as ucase from sqlframe.base.functions import unbase64 as unbase64 from sqlframe.base.functions import unhex as unhex +from sqlframe.base.functions import unix_timestamp as unix_timestamp from sqlframe.base.functions import upper as upper from sqlframe.base.functions import user as user from sqlframe.base.functions import var_pop as var_pop from sqlframe.base.functions import var_samp as var_samp from sqlframe.base.functions import variance as variance +from sqlframe.base.functions import weekofyear as weekofyear from sqlframe.base.functions import when as when -from sqlframe.base.util import get_func_from_session as get_func_from_session - -if t.TYPE_CHECKING: - from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName - -def array_distinct(col: ColumnOrName) -> Column: ... -def array_max(col: ColumnOrName) -> Column: ... -def array_min(col: ColumnOrName) -> Column: ... -def array_position(col: ColumnOrName, value: ColumnOrLiteral) -> Column: ... -def array_remove(col: ColumnOrName, value: ColumnOrLiteral) -> Column: ... -def array_sort(col: ColumnOrName, asc: t.Optional[bool] = ...) -> Column: ... -def bin(col: ColumnOrName) -> Column: ... -def bround(col: ColumnOrName, scale: t.Optional[int] = ...) -> Column: ... -def degrees(col: ColumnOrName) -> Column: ... -def format_number(col: ColumnOrName, d: int) -> Column: ... -def from_unixtime(col: ColumnOrName, format: t.Optional[str] = ...) -> Column: ... -def months_between( - date1: ColumnOrName, date2: ColumnOrName, roundOff: t.Optional[bool] = ... -) -> Column: ... -def next_day(col: ColumnOrName, dayOfWeek: str) -> Column: ... -def radians(col: ColumnOrName) -> Column: ... -def slice( - x: ColumnOrName, start: t.Union[ColumnOrName, int], length: t.Union[ColumnOrName, int] -) -> Column: ... -def sort_array(col: ColumnOrName, asc: t.Optional[bool] = ...) -> Column: ... -def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: ... -def typeof(col: ColumnOrName) -> Column: ... -def unix_timestamp( - timestamp: t.Optional[ColumnOrName] = ..., format: t.Optional[str] = ... -) -> Column: ... +from sqlframe.base.functions import year as year diff --git a/sqlframe/bigquery/session.py b/sqlframe/bigquery/session.py index f339ede..363d793 100644 --- a/sqlframe/bigquery/session.py +++ b/sqlframe/bigquery/session.py @@ -76,6 +76,10 @@ def default_project(self, project: str) -> None: def _try_get_map(cls, value: t.Any) -> t.Optional[t.Dict[str, t.Any]]: return None + @property + def _is_bigquery(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_EXECUTION_DIALECT = "bigquery" diff --git a/sqlframe/databricks/functions.py b/sqlframe/databricks/functions.py index 2b613c8..c80065d 100644 --- a/sqlframe/databricks/functions.py +++ b/sqlframe/databricks/functions.py @@ -11,13 +11,3 @@ if hasattr(func, "unsupported_engines") and "databricks" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import ( # noqa - percentile_without_disc as percentile, - add_months_by_multiplication as add_months, - arrays_overlap_renamed as arrays_overlap, - _is_string_using_typeof_string_lcase as _is_string, - try_element_at_zero_based as try_element_at, - get_json_object_using_function as get_json_object, -) diff --git a/sqlframe/databricks/functions.pyi b/sqlframe/databricks/functions.pyi index 42672f7..8d980b3 100644 --- a/sqlframe/databricks/functions.pyi +++ b/sqlframe/databricks/functions.pyi @@ -1,416 +1,408 @@ -from sqlframe.base.function_alternatives import ( # noqa - percentile_without_disc as percentile, - add_months_by_multiplication as add_months, - arrays_overlap_renamed as arrays_overlap, - try_element_at_zero_based as try_element_at, -) +from sqlframe.base.functions import abs as abs +from sqlframe.base.functions import acos as acos +from sqlframe.base.functions import acosh as acosh +from sqlframe.base.functions import add_months as add_months +from sqlframe.base.functions import aes_decrypt as aes_decrypt +from sqlframe.base.functions import aes_encrypt as aes_encrypt +from sqlframe.base.functions import aggregate as aggregate +from sqlframe.base.functions import any_value as any_value +from sqlframe.base.functions import approx_count_distinct as approx_count_distinct +from sqlframe.base.functions import approx_percentile as approx_percentile +from sqlframe.base.functions import approxCountDistinct as approxCountDistinct +from sqlframe.base.functions import array as array +from sqlframe.base.functions import array_agg as array_agg +from sqlframe.base.functions import array_append as array_append +from sqlframe.base.functions import array_compact as array_compact +from sqlframe.base.functions import array_contains as array_contains +from sqlframe.base.functions import array_distinct as array_distinct +from sqlframe.base.functions import array_except as array_except +from sqlframe.base.functions import array_insert as array_insert +from sqlframe.base.functions import array_intersect as array_intersect +from sqlframe.base.functions import array_join as array_join +from sqlframe.base.functions import array_max as array_max +from sqlframe.base.functions import array_min as array_min +from sqlframe.base.functions import array_position as array_position +from sqlframe.base.functions import array_prepend as array_prepend +from sqlframe.base.functions import array_remove as array_remove +from sqlframe.base.functions import array_repeat as array_repeat +from sqlframe.base.functions import array_size as array_size +from sqlframe.base.functions import array_sort as array_sort +from sqlframe.base.functions import array_union as array_union +from sqlframe.base.functions import arrays_overlap as arrays_overlap +from sqlframe.base.functions import arrays_zip as arrays_zip +from sqlframe.base.functions import asc as asc +from sqlframe.base.functions import asc_nulls_first as asc_nulls_first +from sqlframe.base.functions import asc_nulls_last as asc_nulls_last +from sqlframe.base.functions import ascii as ascii +from sqlframe.base.functions import asin as asin +from sqlframe.base.functions import asinh as asinh +from sqlframe.base.functions import assert_true as assert_true +from sqlframe.base.functions import atan as atan +from sqlframe.base.functions import atan2 as atan2 +from sqlframe.base.functions import atanh as atanh +from sqlframe.base.functions import avg as avg +from sqlframe.base.functions import base64 as base64 +from sqlframe.base.functions import bin as bin +from sqlframe.base.functions import bit_and as bit_and +from sqlframe.base.functions import bit_count as bit_count +from sqlframe.base.functions import bit_get as bit_get +from sqlframe.base.functions import bit_length as bit_length +from sqlframe.base.functions import bit_or as bit_or +from sqlframe.base.functions import bit_xor as bit_xor +from sqlframe.base.functions import bitmap_bit_position as bitmap_bit_position +from sqlframe.base.functions import bitmap_bucket_number as bitmap_bucket_number +from sqlframe.base.functions import bitmap_construct_agg as bitmap_construct_agg +from sqlframe.base.functions import bitmap_count as bitmap_count +from sqlframe.base.functions import bitmap_or_agg as bitmap_or_agg +from sqlframe.base.functions import bitwise_not as bitwise_not +from sqlframe.base.functions import bitwiseNOT as bitwiseNOT +from sqlframe.base.functions import bool_and as bool_and +from sqlframe.base.functions import bool_or as bool_or +from sqlframe.base.functions import broadcast as broadcast +from sqlframe.base.functions import bround as bround +from sqlframe.base.functions import btrim as btrim +from sqlframe.base.functions import bucket as bucket +from sqlframe.base.functions import call_function as call_function +from sqlframe.base.functions import cardinality as cardinality +from sqlframe.base.functions import cbrt as cbrt +from sqlframe.base.functions import ceil as ceil +from sqlframe.base.functions import ceiling as ceiling +from sqlframe.base.functions import char as char +from sqlframe.base.functions import char_length as char_length +from sqlframe.base.functions import character_length as character_length +from sqlframe.base.functions import coalesce as coalesce +from sqlframe.base.functions import col as col +from sqlframe.base.functions import collect_list as collect_list +from sqlframe.base.functions import collect_set as collect_set +from sqlframe.base.functions import concat as concat +from sqlframe.base.functions import concat_ws as concat_ws +from sqlframe.base.functions import contains as contains +from sqlframe.base.functions import conv as conv +from sqlframe.base.functions import convert_timezone as convert_timezone +from sqlframe.base.functions import corr as corr +from sqlframe.base.functions import cos as cos +from sqlframe.base.functions import cosh as cosh +from sqlframe.base.functions import cot as cot +from sqlframe.base.functions import count as count +from sqlframe.base.functions import count_distinct as count_distinct +from sqlframe.base.functions import count_if as count_if +from sqlframe.base.functions import count_min_sketch as count_min_sketch +from sqlframe.base.functions import countDistinct as countDistinct +from sqlframe.base.functions import covar_pop as covar_pop +from sqlframe.base.functions import covar_samp as covar_samp +from sqlframe.base.functions import crc32 as crc32 +from sqlframe.base.functions import create_map as create_map +from sqlframe.base.functions import csc as csc +from sqlframe.base.functions import cume_dist as cume_dist +from sqlframe.base.functions import curdate as curdate +from sqlframe.base.functions import current_catalog as current_catalog +from sqlframe.base.functions import current_database as current_database +from sqlframe.base.functions import current_date as current_date +from sqlframe.base.functions import current_schema as current_schema +from sqlframe.base.functions import current_timestamp as current_timestamp +from sqlframe.base.functions import current_user as current_user +from sqlframe.base.functions import date_add as date_add +from sqlframe.base.functions import date_diff as date_diff +from sqlframe.base.functions import date_format as date_format +from sqlframe.base.functions import date_from_unix_date as date_from_unix_date +from sqlframe.base.functions import date_part as date_part +from sqlframe.base.functions import date_sub as date_sub +from sqlframe.base.functions import date_trunc as date_trunc +from sqlframe.base.functions import dateadd as dateadd +from sqlframe.base.functions import datediff as datediff +from sqlframe.base.functions import datepart as datepart +from sqlframe.base.functions import day as day +from sqlframe.base.functions import dayofmonth as dayofmonth +from sqlframe.base.functions import dayofweek as dayofweek +from sqlframe.base.functions import dayofyear as dayofyear +from sqlframe.base.functions import days as days +from sqlframe.base.functions import decode as decode +from sqlframe.base.functions import degrees as degrees +from sqlframe.base.functions import dense_rank as dense_rank +from sqlframe.base.functions import desc as desc +from sqlframe.base.functions import desc_nulls_first as desc_nulls_first +from sqlframe.base.functions import desc_nulls_last as desc_nulls_last +from sqlframe.base.functions import e as e +from sqlframe.base.functions import element_at as element_at +from sqlframe.base.functions import elt as elt +from sqlframe.base.functions import encode as encode +from sqlframe.base.functions import endswith as endswith +from sqlframe.base.functions import equal_null as equal_null +from sqlframe.base.functions import every as every +from sqlframe.base.functions import exists as exists +from sqlframe.base.functions import exp as exp +from sqlframe.base.functions import explode as explode +from sqlframe.base.functions import explode_outer as explode_outer +from sqlframe.base.functions import expm1 as expm1 +from sqlframe.base.functions import expr as expr +from sqlframe.base.functions import extract as extract +from sqlframe.base.functions import factorial as factorial +from sqlframe.base.functions import filter as filter +from sqlframe.base.functions import find_in_set as find_in_set +from sqlframe.base.functions import first as first +from sqlframe.base.functions import first_value as first_value +from sqlframe.base.functions import flatten as flatten +from sqlframe.base.functions import floor as floor +from sqlframe.base.functions import forall as forall +from sqlframe.base.functions import format_number as format_number +from sqlframe.base.functions import format_string as format_string +from sqlframe.base.functions import from_csv as from_csv +from sqlframe.base.functions import from_json as from_json +from sqlframe.base.functions import from_unixtime as from_unixtime +from sqlframe.base.functions import from_utc_timestamp as from_utc_timestamp +from sqlframe.base.functions import get as get +from sqlframe.base.functions import get_json_object as get_json_object +from sqlframe.base.functions import getbit as getbit +from sqlframe.base.functions import greatest as greatest +from sqlframe.base.functions import grouping as grouping +from sqlframe.base.functions import grouping_id as grouping_id +from sqlframe.base.functions import hash as hash +from sqlframe.base.functions import hex as hex +from sqlframe.base.functions import histogram_numeric as histogram_numeric +from sqlframe.base.functions import hll_sketch_agg as hll_sketch_agg +from sqlframe.base.functions import hll_sketch_estimate as hll_sketch_estimate +from sqlframe.base.functions import hll_union as hll_union +from sqlframe.base.functions import hll_union_agg as hll_union_agg +from sqlframe.base.functions import hour as hour +from sqlframe.base.functions import hours as hours +from sqlframe.base.functions import hypot as hypot +from sqlframe.base.functions import ifnull as ifnull +from sqlframe.base.functions import ilike as ilike +from sqlframe.base.functions import initcap as initcap +from sqlframe.base.functions import inline as inline +from sqlframe.base.functions import inline_outer as inline_outer +from sqlframe.base.functions import input_file_name as input_file_name +from sqlframe.base.functions import instr as instr +from sqlframe.base.functions import isnan as isnan +from sqlframe.base.functions import isnotnull as isnotnull +from sqlframe.base.functions import isnull as isnull +from sqlframe.base.functions import json_array_length as json_array_length +from sqlframe.base.functions import json_object_keys as json_object_keys +from sqlframe.base.functions import json_tuple as json_tuple +from sqlframe.base.functions import kurtosis as kurtosis +from sqlframe.base.functions import lag as lag +from sqlframe.base.functions import last as last +from sqlframe.base.functions import last_day as last_day +from sqlframe.base.functions import last_value as last_value +from sqlframe.base.functions import lcase as lcase +from sqlframe.base.functions import lead as lead +from sqlframe.base.functions import least as least +from sqlframe.base.functions import left as left +from sqlframe.base.functions import length as length +from sqlframe.base.functions import levenshtein as levenshtein +from sqlframe.base.functions import like as like +from sqlframe.base.functions import lit as lit +from sqlframe.base.functions import ln as ln +from sqlframe.base.functions import locate as locate +from sqlframe.base.functions import log as log +from sqlframe.base.functions import log1p as log1p +from sqlframe.base.functions import log2 as log2 +from sqlframe.base.functions import log10 as log10 +from sqlframe.base.functions import lower as lower +from sqlframe.base.functions import lpad as lpad +from sqlframe.base.functions import ltrim as ltrim +from sqlframe.base.functions import make_date as make_date +from sqlframe.base.functions import make_interval as make_interval +from sqlframe.base.functions import make_timestamp as make_timestamp +from sqlframe.base.functions import make_timestamp_ntz as make_timestamp_ntz +from sqlframe.base.functions import map_concat as map_concat +from sqlframe.base.functions import map_contains_key as map_contains_key +from sqlframe.base.functions import map_entries as map_entries +from sqlframe.base.functions import map_filter as map_filter +from sqlframe.base.functions import map_from_arrays as map_from_arrays +from sqlframe.base.functions import map_from_entries as map_from_entries +from sqlframe.base.functions import map_keys as map_keys +from sqlframe.base.functions import map_values as map_values +from sqlframe.base.functions import map_zip_with as map_zip_with +from sqlframe.base.functions import mask as mask +from sqlframe.base.functions import max as max +from sqlframe.base.functions import max_by as max_by +from sqlframe.base.functions import md5 as md5 +from sqlframe.base.functions import mean as mean +from sqlframe.base.functions import median as median +from sqlframe.base.functions import min as min +from sqlframe.base.functions import min_by as min_by +from sqlframe.base.functions import minute as minute +from sqlframe.base.functions import mode as mode from sqlframe.base.functions import ( - abs as abs, - acos as acos, - acosh as acosh, - aes_decrypt as aes_decrypt, - aes_encrypt as aes_encrypt, - aggregate as aggregate, - any_value as any_value, - approxCountDistinct as approxCountDistinct, - approx_count_distinct as approx_count_distinct, - approx_percentile as approx_percentile, - array as array, - array_agg as array_agg, - array_append as array_append, - array_compact as array_compact, - array_contains as array_contains, - array_distinct as array_distinct, - array_except as array_except, - array_insert as array_insert, - array_intersect as array_intersect, - array_join as array_join, - array_max as array_max, - array_min as array_min, - array_position as array_position, - array_prepend as array_prepend, - array_remove as array_remove, - array_repeat as array_repeat, - array_size as array_size, - array_sort as array_sort, - array_union as array_union, - arrays_zip as arrays_zip, - asc as asc, - asc_nulls_first as asc_nulls_first, - asc_nulls_last as asc_nulls_last, - ascii as ascii, - asin as asin, - asinh as asinh, - assert_true as assert_true, - atan as atan, - atan2 as atan2, - atanh as atanh, - avg as avg, - base64 as base64, - bin as bin, - bit_and as bit_and, - bit_count as bit_count, - bit_get as bit_get, - bit_length as bit_length, - bit_or as bit_or, - bit_xor as bit_xor, - bitmap_bit_position as bitmap_bit_position, - bitmap_bucket_number as bitmap_bucket_number, - bitmap_construct_agg as bitmap_construct_agg, - bitmap_count as bitmap_count, - bitmap_or_agg as bitmap_or_agg, - bitwiseNOT as bitwiseNOT, - bitwise_not as bitwise_not, - bool_and as bool_and, - bool_or as bool_or, - broadcast as broadcast, - bround as bround, - btrim as btrim, - bucket as bucket, - call_function as call_function, - cardinality as cardinality, - cbrt as cbrt, - ceil as ceil, - ceiling as ceiling, - char as char, - char_length as char_length, - character_length as character_length, - coalesce as coalesce, - col as col, - collect_list as collect_list, - collect_set as collect_set, - concat as concat, - concat_ws as concat_ws, - contains as contains, - conv as conv, - convert_timezone as convert_timezone, - corr as corr, - cos as cos, - cosh as cosh, - cot as cot, - count as count, - countDistinct as countDistinct, - count_distinct as count_distinct, - count_if as count_if, - count_min_sketch as count_min_sketch, - covar_pop as covar_pop, - covar_samp as covar_samp, - crc32 as crc32, - create_map as create_map, - csc as csc, - cume_dist as cume_dist, - curdate as curdate, - current_catalog as current_catalog, - current_database as current_database, - current_date as current_date, - current_schema as current_schema, - current_timestamp as current_timestamp, - current_timezone as current_timezone, - current_user as current_user, - date_add as date_add, - date_diff as date_diff, - date_format as date_format, - date_from_unix_date as date_from_unix_date, - date_part as date_part, - date_sub as date_sub, - date_trunc as date_trunc, - dateadd as dateadd, - datediff as datediff, - datepart as datepart, - day as day, - dayofmonth as dayofmonth, - dayofweek as dayofweek, - dayofyear as dayofyear, - days as days, - decode as decode, - degrees as degrees, - dense_rank as dense_rank, - desc as desc, - desc_nulls_first as desc_nulls_first, - desc_nulls_last as desc_nulls_last, - e as e, - element_at as element_at, - elt as elt, - encode as encode, - endswith as endswith, - equal_null as equal_null, - every as every, - exists as exists, - exp as exp, - explode as explode, - explode_outer as explode_outer, - expm1 as expm1, - expr as expr, - extract as extract, - factorial as factorial, - filter as filter, - find_in_set as find_in_set, - first as first, - first_value as first_value, - flatten as flatten, - floor as floor, - forall as forall, - format_number as format_number, - format_string as format_string, - from_csv as from_csv, - from_json as from_json, - from_unixtime as from_unixtime, - from_utc_timestamp as from_utc_timestamp, - get as get, - get_active_spark_context as get_active_spark_context, - get_json_object as get_json_object, - getbit as getbit, - greatest as greatest, - grouping as grouping, - grouping_id as grouping_id, - hash as hash, - hex as hex, - histogram_numeric as histogram_numeric, - hll_sketch_agg as hll_sketch_agg, - hll_sketch_estimate as hll_sketch_estimate, - hll_union as hll_union, - hll_union_agg as hll_union_agg, - hour as hour, - hours as hours, - hypot as hypot, - ifnull as ifnull, - ilike as ilike, - initcap as initcap, - inline as inline, - inline_outer as inline_outer, - input_file_name as input_file_name, - instr as instr, - isnan as isnan, - isnotnull as isnotnull, - isnull as isnull, - java_method as java_method, - json_array_length as json_array_length, - json_object_keys as json_object_keys, - json_tuple as json_tuple, - kurtosis as kurtosis, - lag as lag, - last as last, - last_day as last_day, - last_value as last_value, - lcase as lcase, - lead as lead, - least as least, - left as left, - length as length, - levenshtein as levenshtein, - like as like, - lit as lit, - ln as ln, - localtimestamp as localtimestamp, - locate as locate, - log as log, - log10 as log10, - log1p as log1p, - log2 as log2, - lower as lower, - lpad as lpad, - ltrim as ltrim, - make_date as make_date, - make_dt_interval as make_dt_interval, - make_interval as make_interval, - make_timestamp as make_timestamp, - make_timestamp_ltz as make_timestamp_ltz, - make_timestamp_ntz as make_timestamp_ntz, - make_ym_interval as make_ym_interval, - map_concat as map_concat, - map_contains_key as map_contains_key, - map_entries as map_entries, - map_filter as map_filter, - map_from_arrays as map_from_arrays, - map_from_entries as map_from_entries, - map_keys as map_keys, - map_values as map_values, - map_zip_with as map_zip_with, - mask as mask, - max as max, - max_by as max_by, - md5 as md5, - mean as mean, - median as median, - min as min, - min_by as min_by, - minute as minute, - mode as mode, monotonically_increasing_id as monotonically_increasing_id, - month as month, - months as months, - months_between as months_between, - named_struct as named_struct, - nanvl as nanvl, - negate as negate, - negative as negative, - next_day as next_day, - now as now, - nth_value as nth_value, - ntile as ntile, - nullif as nullif, - nvl as nvl, - nvl2 as nvl2, - octet_length as octet_length, - overlay as overlay, - parse_url as parse_url, - percent_rank as percent_rank, - percentile_approx as percentile_approx, - pi as pi, - pmod as pmod, - posexplode as posexplode, - posexplode_outer as posexplode_outer, - position as position, - positive as positive, - pow as pow, - power as power, - printf as printf, - quarter as quarter, - radians as radians, - raise_error as raise_error, - rand as rand, - randn as randn, - rank as rank, - reduce as reduce, - reflect as reflect, - regexp as regexp, - regexp_count as regexp_count, - regexp_extract as regexp_extract, - regexp_extract_all as regexp_extract_all, - regexp_instr as regexp_instr, - regexp_like as regexp_like, - regexp_replace as regexp_replace, - regexp_substr as regexp_substr, - regr_avgx as regr_avgx, - regr_avgy as regr_avgy, - regr_count as regr_count, - regr_intercept as regr_intercept, - regr_r2 as regr_r2, - regr_slope as regr_slope, - regr_sxx as regr_sxx, - regr_sxy as regr_sxy, - regr_syy as regr_syy, - repeat as repeat, - replace as replace, - reverse as reverse, - right as right, - rint as rint, - rlike as rlike, - round as round, - row_number as row_number, - rpad as rpad, - rtrim as rtrim, - schema_of_csv as schema_of_csv, - schema_of_json as schema_of_json, - sec as sec, - second as second, - sentences as sentences, - sequence as sequence, - sha as sha, - sha1 as sha1, - sha2 as sha2, - shiftLeft as shiftLeft, - shiftRight as shiftRight, - shiftRightUnsigned as shiftRightUnsigned, - shiftleft as shiftleft, - shiftright as shiftright, - shiftrightunsigned as shiftrightunsigned, - shuffle as shuffle, - sign as sign, - signum as signum, - sin as sin, - sinh as sinh, - size as size, - skewness as skewness, - slice as slice, - some as some, - sort_array as sort_array, - soundex as soundex, - spark_partition_id as spark_partition_id, - split as split, - split_part as split_part, - sqrt as sqrt, - stack as stack, - startswith as startswith, - std as std, - stddev as stddev, - stddev_pop as stddev_pop, - stddev_samp as stddev_samp, - str_to_map as str_to_map, - struct as struct, - substr as substr, - substring as substring, - substring_index as substring_index, - sum as sum, - sumDistinct as sumDistinct, - sum_distinct as sum_distinct, - tan as tan, - tanh as tanh, - timestamp_micros as timestamp_micros, - timestamp_millis as timestamp_millis, - timestamp_seconds as timestamp_seconds, - toDegrees as toDegrees, - toRadians as toRadians, - to_binary as to_binary, - to_char as to_char, - to_csv as to_csv, - to_date as to_date, - to_json as to_json, - to_number as to_number, - to_timestamp as to_timestamp, - to_timestamp_ltz as to_timestamp_ltz, - to_timestamp_ntz as to_timestamp_ntz, - to_unix_timestamp as to_unix_timestamp, - to_utc_timestamp as to_utc_timestamp, - to_varchar as to_varchar, - transform as transform, - transform_keys as transform_keys, - transform_values as transform_values, - translate as translate, - trim as trim, - trunc as trunc, - try_add as try_add, - try_aes_decrypt as try_aes_decrypt, - try_avg as try_avg, - try_divide as try_divide, - try_multiply as try_multiply, - try_subtract as try_subtract, - try_sum as try_sum, - try_to_binary as try_to_binary, - try_to_number as try_to_number, - try_to_timestamp as try_to_timestamp, - typeof as typeof, - ucase as ucase, - unbase64 as unbase64, - unhex as unhex, - unix_date as unix_date, - unix_micros as unix_micros, - unix_millis as unix_millis, - unix_seconds as unix_seconds, - unix_timestamp as unix_timestamp, - upper as upper, - url_decode as url_decode, - url_encode as url_encode, - user as user, - var_pop as var_pop, - var_samp as var_samp, - variance as variance, - version as version, - weekday as weekday, - weekofyear as weekofyear, - when as when, - width_bucket as width_bucket, - xpath as xpath, - xpath_boolean as xpath_boolean, - xpath_double as xpath_double, - xpath_float as xpath_float, - xpath_int as xpath_int, - xpath_long as xpath_long, - xpath_number as xpath_number, - xpath_short as xpath_short, - xpath_string as xpath_string, - xxhash64 as xxhash64, - year as year, - years as years, - zip_with as zip_with, ) +from sqlframe.base.functions import month as month +from sqlframe.base.functions import months as months +from sqlframe.base.functions import months_between as months_between +from sqlframe.base.functions import named_struct as named_struct +from sqlframe.base.functions import nanvl as nanvl +from sqlframe.base.functions import negate as negate +from sqlframe.base.functions import negative as negative +from sqlframe.base.functions import next_day as next_day +from sqlframe.base.functions import now as now +from sqlframe.base.functions import nth_value as nth_value +from sqlframe.base.functions import ntile as ntile +from sqlframe.base.functions import nullif as nullif +from sqlframe.base.functions import nvl as nvl +from sqlframe.base.functions import nvl2 as nvl2 +from sqlframe.base.functions import octet_length as octet_length +from sqlframe.base.functions import overlay as overlay +from sqlframe.base.functions import parse_url as parse_url +from sqlframe.base.functions import percent_rank as percent_rank +from sqlframe.base.functions import percentile as percentile +from sqlframe.base.functions import percentile_approx as percentile_approx +from sqlframe.base.functions import pi as pi +from sqlframe.base.functions import pmod as pmod +from sqlframe.base.functions import posexplode as posexplode +from sqlframe.base.functions import posexplode_outer as posexplode_outer +from sqlframe.base.functions import position as position +from sqlframe.base.functions import positive as positive +from sqlframe.base.functions import pow as pow +from sqlframe.base.functions import power as power +from sqlframe.base.functions import printf as printf +from sqlframe.base.functions import quarter as quarter +from sqlframe.base.functions import radians as radians +from sqlframe.base.functions import raise_error as raise_error +from sqlframe.base.functions import rand as rand +from sqlframe.base.functions import randn as randn +from sqlframe.base.functions import rank as rank +from sqlframe.base.functions import reduce as reduce +from sqlframe.base.functions import regexp as regexp +from sqlframe.base.functions import regexp_count as regexp_count +from sqlframe.base.functions import regexp_extract as regexp_extract +from sqlframe.base.functions import regexp_extract_all as regexp_extract_all +from sqlframe.base.functions import regexp_instr as regexp_instr +from sqlframe.base.functions import regexp_like as regexp_like +from sqlframe.base.functions import regexp_replace as regexp_replace +from sqlframe.base.functions import regexp_substr as regexp_substr +from sqlframe.base.functions import regr_avgx as regr_avgx +from sqlframe.base.functions import regr_avgy as regr_avgy +from sqlframe.base.functions import regr_count as regr_count +from sqlframe.base.functions import regr_intercept as regr_intercept +from sqlframe.base.functions import regr_r2 as regr_r2 +from sqlframe.base.functions import regr_slope as regr_slope +from sqlframe.base.functions import regr_sxx as regr_sxx +from sqlframe.base.functions import regr_sxy as regr_sxy +from sqlframe.base.functions import regr_syy as regr_syy +from sqlframe.base.functions import repeat as repeat +from sqlframe.base.functions import replace as replace +from sqlframe.base.functions import reverse as reverse +from sqlframe.base.functions import right as right +from sqlframe.base.functions import rint as rint +from sqlframe.base.functions import rlike as rlike +from sqlframe.base.functions import round as round +from sqlframe.base.functions import row_number as row_number +from sqlframe.base.functions import rpad as rpad +from sqlframe.base.functions import rtrim as rtrim +from sqlframe.base.functions import schema_of_csv as schema_of_csv +from sqlframe.base.functions import schema_of_json as schema_of_json +from sqlframe.base.functions import sec as sec +from sqlframe.base.functions import second as second +from sqlframe.base.functions import sentences as sentences +from sqlframe.base.functions import sequence as sequence +from sqlframe.base.functions import session_window as session_window +from sqlframe.base.functions import sha as sha +from sqlframe.base.functions import sha1 as sha1 +from sqlframe.base.functions import sha2 as sha2 +from sqlframe.base.functions import shiftLeft as shiftLeft +from sqlframe.base.functions import shiftleft as shiftleft +from sqlframe.base.functions import shiftRight as shiftRight +from sqlframe.base.functions import shiftright as shiftright +from sqlframe.base.functions import shiftRightUnsigned as shiftRightUnsigned +from sqlframe.base.functions import shiftrightunsigned as shiftrightunsigned +from sqlframe.base.functions import shuffle as shuffle +from sqlframe.base.functions import sign as sign +from sqlframe.base.functions import signum as signum +from sqlframe.base.functions import sin as sin +from sqlframe.base.functions import sinh as sinh +from sqlframe.base.functions import size as size +from sqlframe.base.functions import skewness as skewness +from sqlframe.base.functions import slice as slice +from sqlframe.base.functions import some as some +from sqlframe.base.functions import sort_array as sort_array +from sqlframe.base.functions import soundex as soundex +from sqlframe.base.functions import spark_partition_id as spark_partition_id +from sqlframe.base.functions import split as split +from sqlframe.base.functions import split_part as split_part +from sqlframe.base.functions import sqrt as sqrt +from sqlframe.base.functions import stack as stack +from sqlframe.base.functions import startswith as startswith +from sqlframe.base.functions import std as std +from sqlframe.base.functions import stddev as stddev +from sqlframe.base.functions import stddev_pop as stddev_pop +from sqlframe.base.functions import stddev_samp as stddev_samp +from sqlframe.base.functions import str_to_map as str_to_map +from sqlframe.base.functions import struct as struct +from sqlframe.base.functions import substr as substr +from sqlframe.base.functions import substring as substring +from sqlframe.base.functions import substring_index as substring_index +from sqlframe.base.functions import sum as sum +from sqlframe.base.functions import sum_distinct as sum_distinct +from sqlframe.base.functions import sumDistinct as sumDistinct +from sqlframe.base.functions import tan as tan +from sqlframe.base.functions import tanh as tanh +from sqlframe.base.functions import timestamp_micros as timestamp_micros +from sqlframe.base.functions import timestamp_millis as timestamp_millis +from sqlframe.base.functions import timestamp_seconds as timestamp_seconds +from sqlframe.base.functions import to_binary as to_binary +from sqlframe.base.functions import to_char as to_char +from sqlframe.base.functions import to_csv as to_csv +from sqlframe.base.functions import to_date as to_date +from sqlframe.base.functions import to_json as to_json +from sqlframe.base.functions import to_number as to_number +from sqlframe.base.functions import to_timestamp as to_timestamp +from sqlframe.base.functions import to_timestamp_ntz as to_timestamp_ntz +from sqlframe.base.functions import to_unix_timestamp as to_unix_timestamp +from sqlframe.base.functions import to_utc_timestamp as to_utc_timestamp +from sqlframe.base.functions import to_varchar as to_varchar +from sqlframe.base.functions import toDegrees as toDegrees +from sqlframe.base.functions import toRadians as toRadians +from sqlframe.base.functions import transform as transform +from sqlframe.base.functions import transform_keys as transform_keys +from sqlframe.base.functions import transform_values as transform_values +from sqlframe.base.functions import translate as translate +from sqlframe.base.functions import trim as trim +from sqlframe.base.functions import trunc as trunc +from sqlframe.base.functions import try_add as try_add +from sqlframe.base.functions import try_aes_decrypt as try_aes_decrypt +from sqlframe.base.functions import try_avg as try_avg +from sqlframe.base.functions import try_divide as try_divide +from sqlframe.base.functions import try_element_at as try_element_at +from sqlframe.base.functions import try_multiply as try_multiply +from sqlframe.base.functions import try_subtract as try_subtract +from sqlframe.base.functions import try_sum as try_sum +from sqlframe.base.functions import try_to_binary as try_to_binary +from sqlframe.base.functions import try_to_number as try_to_number +from sqlframe.base.functions import try_to_timestamp as try_to_timestamp +from sqlframe.base.functions import typeof as typeof +from sqlframe.base.functions import ucase as ucase +from sqlframe.base.functions import unbase64 as unbase64 +from sqlframe.base.functions import unhex as unhex +from sqlframe.base.functions import unix_date as unix_date +from sqlframe.base.functions import unix_micros as unix_micros +from sqlframe.base.functions import unix_millis as unix_millis +from sqlframe.base.functions import unix_seconds as unix_seconds +from sqlframe.base.functions import unix_timestamp as unix_timestamp +from sqlframe.base.functions import upper as upper +from sqlframe.base.functions import url_decode as url_decode +from sqlframe.base.functions import url_encode as url_encode +from sqlframe.base.functions import user as user +from sqlframe.base.functions import var_pop as var_pop +from sqlframe.base.functions import var_samp as var_samp +from sqlframe.base.functions import variance as variance +from sqlframe.base.functions import version as version +from sqlframe.base.functions import weekday as weekday +from sqlframe.base.functions import weekofyear as weekofyear +from sqlframe.base.functions import when as when +from sqlframe.base.functions import width_bucket as width_bucket +from sqlframe.base.functions import window as window +from sqlframe.base.functions import window_time as window_time +from sqlframe.base.functions import xpath as xpath +from sqlframe.base.functions import xpath_boolean as xpath_boolean +from sqlframe.base.functions import xpath_double as xpath_double +from sqlframe.base.functions import xpath_float as xpath_float +from sqlframe.base.functions import xpath_int as xpath_int +from sqlframe.base.functions import xpath_long as xpath_long +from sqlframe.base.functions import xpath_number as xpath_number +from sqlframe.base.functions import xpath_short as xpath_short +from sqlframe.base.functions import xpath_string as xpath_string +from sqlframe.base.functions import xxhash64 as xxhash64 +from sqlframe.base.functions import year as year +from sqlframe.base.functions import years as years +from sqlframe.base.functions import zip_with as zip_with diff --git a/sqlframe/databricks/session.py b/sqlframe/databricks/session.py index 70ed035..a442d2f 100644 --- a/sqlframe/databricks/session.py +++ b/sqlframe/databricks/session.py @@ -59,6 +59,10 @@ def _try_get_map(cls, value: t.Any) -> t.Optional[t.Dict[str, t.Any]]: return dict(value) return None + @property + def _is_databricks(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_EXECUTION_DIALECT = "databricks" diff --git a/sqlframe/duckdb/functions.py b/sqlframe/duckdb/functions.py index 0750666..15de4d2 100644 --- a/sqlframe/duckdb/functions.py +++ b/sqlframe/duckdb/functions.py @@ -15,43 +15,3 @@ and "*" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import ( # noqa - any_value_always_ignore_nulls as any_value, - e_literal as e, - expm1_from_exp as expm1, - log1p_from_log as log1p, - rint_from_round as rint, - kurtosis_from_kurtosis_pop as kurtosis, - collect_set_from_list_distinct as collect_set, - first_always_ignore_nulls as first, - factorial_ensure_int as factorial, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - percentile_approx_without_accuracy as percentile_approx, - rand_no_seed as rand, - base64_from_blob as base64, - decode_from_blob as decode, - format_string_with_pipes as format_string, - overlay_from_substr as overlay, - split_no_limit as split, - arrays_overlap_using_intersect as arrays_overlap, - slice_as_list_slice as slice, - array_join_null_replacement_with_transform as array_join, - element_at_using_brackets as element_at, - array_remove_using_filter as array_remove, - array_union_using_list_concat as array_union, - array_min_from_sort as array_min, - array_max_from_sort as array_max, - sequence_from_generate_series as sequence, - try_element_at_zero_based as try_element_at, - day_with_try_to_timestamp as day, - try_to_timestamp_strptime as try_to_timestamp, - _is_string_using_typeof_varchar as _is_string, - array_append_list_append as array_append, - endswith_with_underscore as endswith, - last_day_with_cast as last_day, - regexp_replace_global_option as regexp_replace, - to_timestamp_tz as to_timestamp, -) diff --git a/sqlframe/duckdb/functions.pyi b/sqlframe/duckdb/functions.pyi index 4976b8b..9698d10 100644 --- a/sqlframe/duckdb/functions.pyi +++ b/sqlframe/duckdb/functions.pyi @@ -1,216 +1,219 @@ -from sqlframe.base.function_alternatives import ( # noqa - any_value_always_ignore_nulls as any_value, - e_literal as e, - expm1_from_exp as expm1, - log1p_from_log as log1p, - rint_from_round as rint, - kurtosis_from_kurtosis_pop as kurtosis, - collect_set_from_list_distinct as collect_set, - first_always_ignore_nulls as first, - factorial_ensure_int as factorial, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - percentile_approx_without_accuracy as percentile_approx, - rand_no_seed as rand, - base64_from_blob as base64, - decode_from_blob as decode, - format_string_with_pipes as format_string, - overlay_from_substr as overlay, - split_no_limit as split, - arrays_overlap_using_intersect as arrays_overlap, - slice_as_list_slice as slice, - array_join_null_replacement_with_transform as array_join, - element_at_using_brackets as element_at, - array_remove_using_filter as array_remove, - array_union_using_list_concat as array_union, - array_min_from_sort as array_min, - array_max_from_sort as array_max, - sequence_from_generate_series as sequence, - try_element_at_zero_based as try_element_at, - to_unix_timestamp_include_default_format as to_unix_timestamp, - regexp_replace_global_option as regexp_replace, - to_timestamp_tz as to_timestamp, -) -from sqlframe.base.functions import ( - abs as abs, - acos as acos, - add_months as add_months, - approxCountDistinct as approxCountDistinct, - approx_count_distinct as approx_count_distinct, - array as array, - array_contains as array_contains, - array_distinct as array_distinct, - array_intersect as array_intersect, - array_position as array_position, - array_sort as array_sort, - asc as asc, - asc_nulls_first as asc_nulls_first, - asc_nulls_last as asc_nulls_last, - ascii as ascii, - asin as asin, - atan as atan, - atan2 as atan2, - avg as avg, - bin as bin, - bit_length as bit_length, - bitwiseNOT as bitwiseNOT, - bitwise_not as bitwise_not, - bool_and as bool_and, - bool_or as bool_or, - call_function as call_function, - cbrt as cbrt, - ceil as ceil, - ceiling as ceiling, - char as char, - coalesce as coalesce, - col as col, - collect_list as collect_list, - concat as concat, - concat_ws as concat_ws, - corr as corr, - cos as cos, - cot as cot, - count as count, - countDistinct as countDistinct, - count_distinct as count_distinct, - count_if as count_if, - covar_pop as covar_pop, - covar_samp as covar_samp, - create_map as create_map, - cume_dist as cume_dist, - current_date as current_date, - current_timestamp as current_timestamp, - current_user as current_user, - date_add as date_add, - date_diff as date_diff, - date_format as date_format, - date_sub as date_sub, - date_trunc as date_trunc, - dateadd as dateadd, - datediff as datediff, - dayofmonth as dayofmonth, - dayofweek as dayofweek, - dayofyear as dayofyear, - degrees as degrees, - dense_rank as dense_rank, - desc as desc, - desc_nulls_first as desc_nulls_first, - desc_nulls_last as desc_nulls_last, - encode as encode, - exp as exp, - explode as explode, - expr as expr, - extract as extract, - flatten as flatten, - floor as floor, - from_unixtime as from_unixtime, - get_json_object as get_json_object, - greatest as greatest, - grouping_id as grouping_id, - hash as hash, - hex as hex, - hour as hour, - ifnull as ifnull, - input_file_name as input_file_name, - instr as instr, - isnan as isnan, - lag as lag, - last as last, - lcase as lcase, - lead as lead, - least as least, - left as left, - length as length, - levenshtein as levenshtein, - lit as lit, - ln as ln, - locate as locate, - log as log, - log10 as log10, - log2 as log2, - lower as lower, - lpad as lpad, - ltrim as ltrim, - make_date as make_date, - map_from_arrays as map_from_arrays, - max as max, - max_by as max_by, - md5 as md5, - mean as mean, - min as min, - min_by as min_by, - minute as minute, - month as month, - months_between as months_between, - now as now, - nth_value as nth_value, - ntile as ntile, - nullif as nullif, - nvl as nvl, - nvl2 as nvl2, - percent_rank as percent_rank, - percentile as percentile, - position as position, - pow as pow, - power as power, - quarter as quarter, - radians as radians, - rank as rank, - regexp_extract as regexp_extract, - regexp_like as regexp_like, - repeat as repeat, - reverse as reverse, - right as right, - rlike as rlike, - round as round, - row_number as row_number, - rpad as rpad, - rtrim as rtrim, - second as second, - shiftLeft as shiftLeft, - shiftRight as shiftRight, - shiftleft as shiftleft, - shiftright as shiftright, - sign as sign, - signum as signum, - sin as sin, - size as size, - skewness as skewness, - sort_array as sort_array, - soundex as soundex, - split_part as split_part, - sqrt as sqrt, - startswith as startswith, - stddev as stddev, - stddev_pop as stddev_pop, - stddev_samp as stddev_samp, - struct as struct, - substring as substring, - substr as substr, - sum as sum, - sumDistinct as sumDistinct, - sum_distinct as sum_distinct, - tan as tan, - timestamp_seconds as timestamp_seconds, - toDegrees as toDegrees, - toRadians as toRadians, - to_date as to_date, - to_timestamp as to_timestamp, - translate as translate, - trim as trim, - trunc as trunc, - typeof as typeof, - ucase as ucase, - unbase64 as unbase64, - unhex as unhex, - unix_date as unix_date, - unix_timestamp as unix_timestamp, - upper as upper, - user as user, - var_pop as var_pop, - var_samp as var_samp, - variance as variance, - weekofyear as weekofyear, - when as when, - year as year, -) +from sqlframe.base.functions import abs as abs +from sqlframe.base.functions import acos as acos +from sqlframe.base.functions import add_months as add_months +from sqlframe.base.functions import any_value as any_value +from sqlframe.base.functions import approx_count_distinct as approx_count_distinct +from sqlframe.base.functions import approxCountDistinct as approxCountDistinct +from sqlframe.base.functions import array as array +from sqlframe.base.functions import array_append as array_append +from sqlframe.base.functions import array_contains as array_contains +from sqlframe.base.functions import array_distinct as array_distinct +from sqlframe.base.functions import array_intersect as array_intersect +from sqlframe.base.functions import array_join as array_join +from sqlframe.base.functions import array_max as array_max +from sqlframe.base.functions import array_min as array_min +from sqlframe.base.functions import array_position as array_position +from sqlframe.base.functions import array_remove as array_remove +from sqlframe.base.functions import array_sort as array_sort +from sqlframe.base.functions import array_union as array_union +from sqlframe.base.functions import arrays_overlap as arrays_overlap +from sqlframe.base.functions import asc as asc +from sqlframe.base.functions import asc_nulls_first as asc_nulls_first +from sqlframe.base.functions import asc_nulls_last as asc_nulls_last +from sqlframe.base.functions import ascii as ascii +from sqlframe.base.functions import asin as asin +from sqlframe.base.functions import atan as atan +from sqlframe.base.functions import atan2 as atan2 +from sqlframe.base.functions import avg as avg +from sqlframe.base.functions import base64 as base64 +from sqlframe.base.functions import bin as bin +from sqlframe.base.functions import bit_length as bit_length +from sqlframe.base.functions import bitwise_not as bitwise_not +from sqlframe.base.functions import bitwiseNOT as bitwiseNOT +from sqlframe.base.functions import bool_and as bool_and +from sqlframe.base.functions import bool_or as bool_or +from sqlframe.base.functions import call_function as call_function +from sqlframe.base.functions import cbrt as cbrt +from sqlframe.base.functions import ceil as ceil +from sqlframe.base.functions import ceiling as ceiling +from sqlframe.base.functions import char as char +from sqlframe.base.functions import coalesce as coalesce +from sqlframe.base.functions import col as col +from sqlframe.base.functions import collect_list as collect_list +from sqlframe.base.functions import collect_set as collect_set +from sqlframe.base.functions import concat as concat +from sqlframe.base.functions import concat_ws as concat_ws +from sqlframe.base.functions import contains as contains +from sqlframe.base.functions import convert_timezone as convert_timezone +from sqlframe.base.functions import corr as corr +from sqlframe.base.functions import cos as cos +from sqlframe.base.functions import cot as cot +from sqlframe.base.functions import count as count +from sqlframe.base.functions import count_distinct as count_distinct +from sqlframe.base.functions import count_if as count_if +from sqlframe.base.functions import countDistinct as countDistinct +from sqlframe.base.functions import covar_pop as covar_pop +from sqlframe.base.functions import covar_samp as covar_samp +from sqlframe.base.functions import create_map as create_map +from sqlframe.base.functions import cume_dist as cume_dist +from sqlframe.base.functions import current_date as current_date +from sqlframe.base.functions import current_timestamp as current_timestamp +from sqlframe.base.functions import current_user as current_user +from sqlframe.base.functions import date_add as date_add +from sqlframe.base.functions import date_diff as date_diff +from sqlframe.base.functions import date_format as date_format +from sqlframe.base.functions import date_sub as date_sub +from sqlframe.base.functions import date_trunc as date_trunc +from sqlframe.base.functions import dateadd as dateadd +from sqlframe.base.functions import datediff as datediff +from sqlframe.base.functions import day as day +from sqlframe.base.functions import dayofmonth as dayofmonth +from sqlframe.base.functions import dayofweek as dayofweek +from sqlframe.base.functions import dayofyear as dayofyear +from sqlframe.base.functions import decode as decode +from sqlframe.base.functions import degrees as degrees +from sqlframe.base.functions import dense_rank as dense_rank +from sqlframe.base.functions import desc as desc +from sqlframe.base.functions import desc_nulls_first as desc_nulls_first +from sqlframe.base.functions import desc_nulls_last as desc_nulls_last +from sqlframe.base.functions import e as e +from sqlframe.base.functions import element_at as element_at +from sqlframe.base.functions import encode as encode +from sqlframe.base.functions import endswith as endswith +from sqlframe.base.functions import exp as exp +from sqlframe.base.functions import explode as explode +from sqlframe.base.functions import expm1 as expm1 +from sqlframe.base.functions import expr as expr +from sqlframe.base.functions import extract as extract +from sqlframe.base.functions import factorial as factorial +from sqlframe.base.functions import first as first +from sqlframe.base.functions import flatten as flatten +from sqlframe.base.functions import floor as floor +from sqlframe.base.functions import format_string as format_string +from sqlframe.base.functions import from_unixtime as from_unixtime +from sqlframe.base.functions import get_json_object as get_json_object +from sqlframe.base.functions import greatest as greatest +from sqlframe.base.functions import grouping_id as grouping_id +from sqlframe.base.functions import hash as hash +from sqlframe.base.functions import hex as hex +from sqlframe.base.functions import hour as hour +from sqlframe.base.functions import ifnull as ifnull +from sqlframe.base.functions import input_file_name as input_file_name +from sqlframe.base.functions import instr as instr +from sqlframe.base.functions import isnan as isnan +from sqlframe.base.functions import isnull as isnull +from sqlframe.base.functions import kurtosis as kurtosis +from sqlframe.base.functions import lag as lag +from sqlframe.base.functions import last as last +from sqlframe.base.functions import last_day as last_day +from sqlframe.base.functions import lcase as lcase +from sqlframe.base.functions import lead as lead +from sqlframe.base.functions import least as least +from sqlframe.base.functions import left as left +from sqlframe.base.functions import length as length +from sqlframe.base.functions import levenshtein as levenshtein +from sqlframe.base.functions import lit as lit +from sqlframe.base.functions import ln as ln +from sqlframe.base.functions import locate as locate +from sqlframe.base.functions import log as log +from sqlframe.base.functions import log1p as log1p +from sqlframe.base.functions import log2 as log2 +from sqlframe.base.functions import log10 as log10 +from sqlframe.base.functions import lower as lower +from sqlframe.base.functions import lpad as lpad +from sqlframe.base.functions import ltrim as ltrim +from sqlframe.base.functions import make_date as make_date +from sqlframe.base.functions import map_from_arrays as map_from_arrays +from sqlframe.base.functions import max as max +from sqlframe.base.functions import max_by as max_by +from sqlframe.base.functions import md5 as md5 +from sqlframe.base.functions import mean as mean +from sqlframe.base.functions import median as median +from sqlframe.base.functions import min as min +from sqlframe.base.functions import min_by as min_by +from sqlframe.base.functions import minute as minute +from sqlframe.base.functions import month as month +from sqlframe.base.functions import months_between as months_between +from sqlframe.base.functions import nanvl as nanvl +from sqlframe.base.functions import now as now +from sqlframe.base.functions import nth_value as nth_value +from sqlframe.base.functions import ntile as ntile +from sqlframe.base.functions import nullif as nullif +from sqlframe.base.functions import nvl as nvl +from sqlframe.base.functions import nvl2 as nvl2 +from sqlframe.base.functions import overlay as overlay +from sqlframe.base.functions import percent_rank as percent_rank +from sqlframe.base.functions import percentile as percentile +from sqlframe.base.functions import percentile_approx as percentile_approx +from sqlframe.base.functions import position as position +from sqlframe.base.functions import pow as pow +from sqlframe.base.functions import power as power +from sqlframe.base.functions import quarter as quarter +from sqlframe.base.functions import radians as radians +from sqlframe.base.functions import rand as rand +from sqlframe.base.functions import rank as rank +from sqlframe.base.functions import regexp_extract as regexp_extract +from sqlframe.base.functions import regexp_like as regexp_like +from sqlframe.base.functions import regexp_replace as regexp_replace +from sqlframe.base.functions import repeat as repeat +from sqlframe.base.functions import reverse as reverse +from sqlframe.base.functions import right as right +from sqlframe.base.functions import rint as rint +from sqlframe.base.functions import rlike as rlike +from sqlframe.base.functions import round as round +from sqlframe.base.functions import row_number as row_number +from sqlframe.base.functions import rpad as rpad +from sqlframe.base.functions import rtrim as rtrim +from sqlframe.base.functions import second as second +from sqlframe.base.functions import sequence as sequence +from sqlframe.base.functions import shiftLeft as shiftLeft +from sqlframe.base.functions import shiftleft as shiftleft +from sqlframe.base.functions import shiftRight as shiftRight +from sqlframe.base.functions import shiftright as shiftright +from sqlframe.base.functions import sign as sign +from sqlframe.base.functions import signum as signum +from sqlframe.base.functions import sin as sin +from sqlframe.base.functions import size as size +from sqlframe.base.functions import skewness as skewness +from sqlframe.base.functions import slice as slice +from sqlframe.base.functions import sort_array as sort_array +from sqlframe.base.functions import soundex as soundex +from sqlframe.base.functions import split as split +from sqlframe.base.functions import split_part as split_part +from sqlframe.base.functions import sqrt as sqrt +from sqlframe.base.functions import startswith as startswith +from sqlframe.base.functions import stddev as stddev +from sqlframe.base.functions import stddev_pop as stddev_pop +from sqlframe.base.functions import stddev_samp as stddev_samp +from sqlframe.base.functions import struct as struct +from sqlframe.base.functions import substr as substr +from sqlframe.base.functions import substring as substring +from sqlframe.base.functions import sum as sum +from sqlframe.base.functions import sum_distinct as sum_distinct +from sqlframe.base.functions import sumDistinct as sumDistinct +from sqlframe.base.functions import tan as tan +from sqlframe.base.functions import timestamp_seconds as timestamp_seconds +from sqlframe.base.functions import to_date as to_date +from sqlframe.base.functions import to_timestamp as to_timestamp +from sqlframe.base.functions import to_unix_timestamp as to_unix_timestamp +from sqlframe.base.functions import toDegrees as toDegrees +from sqlframe.base.functions import toRadians as toRadians +from sqlframe.base.functions import translate as translate +from sqlframe.base.functions import trim as trim +from sqlframe.base.functions import trunc as trunc +from sqlframe.base.functions import try_element_at as try_element_at +from sqlframe.base.functions import try_to_timestamp as try_to_timestamp +from sqlframe.base.functions import typeof as typeof +from sqlframe.base.functions import ucase as ucase +from sqlframe.base.functions import unbase64 as unbase64 +from sqlframe.base.functions import unhex as unhex +from sqlframe.base.functions import unix_date as unix_date +from sqlframe.base.functions import unix_timestamp as unix_timestamp +from sqlframe.base.functions import upper as upper +from sqlframe.base.functions import user as user +from sqlframe.base.functions import var_pop as var_pop +from sqlframe.base.functions import var_samp as var_samp +from sqlframe.base.functions import variance as variance +from sqlframe.base.functions import weekofyear as weekofyear +from sqlframe.base.functions import when as when +from sqlframe.base.functions import year as year diff --git a/sqlframe/duckdb/session.py b/sqlframe/duckdb/session.py index a7ce1d3..abdce5a 100644 --- a/sqlframe/duckdb/session.py +++ b/sqlframe/duckdb/session.py @@ -71,6 +71,10 @@ def _try_get_map(cls, value: t.Any) -> t.Optional[t.Dict[str, t.Any]]: def _execute(self, sql: str) -> None: self._last_result = self._cur.execute(sql) # type: ignore + @property + def _is_duckdb(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_EXECUTION_DIALECT = "duckdb" diff --git a/sqlframe/postgres/functions.py b/sqlframe/postgres/functions.py index 673fb4a..97eaa78 100644 --- a/sqlframe/postgres/functions.py +++ b/sqlframe/postgres/functions.py @@ -1,7 +1,7 @@ import inspect import sys -import sqlframe.base.functions +import sqlframe.base.functions # noqa module = sys.modules["sqlframe.base.functions"] globals().update( @@ -13,62 +13,3 @@ and "*" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import ( # noqa - any_value_ignore_nulls_not_supported as any_value, - e_literal as e, - expm1_from_exp as expm1, - log1p_from_log as log1p, - rint_from_round as rint, - collect_set_from_list_distinct as collect_set, - isnan_using_equal as isnan, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - rand_no_seed as rand, - round_cast_as_numeric as round, - year_from_extract as year, - quarter_from_extract as quarter, - month_from_extract as month, - dayofweek_from_extract_with_isodow as dayofweek, - dayofmonth_from_extract_with_day as dayofmonth, - dayofyear_from_extract_doy as dayofyear, - hour_from_extract as hour, - minute_from_extract as minute, - second_from_extract as second, - weekofyear_from_extract_as_week as weekofyear, - make_date_casted_as_integer as make_date, - date_add_by_multiplication as date_add, - date_sub_by_multiplication as date_sub, - date_diff_with_subtraction as date_diff, - date_diff_with_subtraction as datediff, - add_months_by_multiplication as add_months, - months_between_from_age_and_extract as months_between, - from_unixtime_from_timestamp as from_unixtime, - unix_timestamp_from_extract as unix_timestamp, - base64_from_blob as base64, - bas64_from_encode as base64, - unbase64_from_decode as unbase64, - decode_from_convert_from as decode, - encode_from_convert_to as encode, - format_number_from_to_char as format_number, - format_string_with_format as format_string, - split_from_regex_split_to_array as split, - array_contains_any as array_contains, - slice_with_brackets as slice, - element_at_using_brackets as element_at, - get_json_object_using_arrow_op as get_json_object, - array_min_from_subquery as array_min, - array_max_from_subquery as array_max, - left_cast_len as left, - right_cast_len as right, - position_cast_start as position, - try_element_at_zero_based as try_element_at, - try_to_timestamp_pgtemp as try_to_timestamp, - typeof_pg_typeof as typeof, - _is_string_using_typeof_char_varying as _is_string, - endswith_using_like as endswith, - last_day_with_cast as last_day, - regexp_replace_global_option as regexp_replace, - to_timestamp_with_time_zone as to_timestamp, -) diff --git a/sqlframe/postgres/functions.pyi b/sqlframe/postgres/functions.pyi index 7724db7..a84b2f6 100644 --- a/sqlframe/postgres/functions.pyi +++ b/sqlframe/postgres/functions.pyi @@ -1,196 +1,197 @@ -from sqlframe.base.function_alternatives import ( # noqa - any_value_ignore_nulls_not_supported as any_value, - e_literal as e, - expm1_from_exp as expm1, - log1p_from_log as log1p, - rint_from_round as rint, - collect_set_from_list_distinct as collect_set, - isnan_using_equal as isnan, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - rand_no_seed as rand, - round_cast_as_numeric as round, - year_from_extract as year, - quarter_from_extract as quarter, - month_from_extract as month, - dayofweek_from_extract_with_isodow as dayofweek, - dayofmonth_from_extract_with_day as dayofmonth, - dayofyear_from_extract_doy as dayofyear, - hour_from_extract as hour, - minute_from_extract as minute, - second_from_extract as second, - weekofyear_from_extract_as_week as weekofyear, - make_date_casted_as_integer as make_date, - date_add_by_multiplication as date_add, - date_sub_by_multiplication as date_sub, - date_diff_with_subtraction as date_diff, - date_diff_with_subtraction as datediff, - add_months_by_multiplication as add_months, - months_between_from_age_and_extract as months_between, - from_unixtime_from_timestamp as from_unixtime, - unix_timestamp_from_extract as unix_timestamp, - base64_from_blob as base64, - bas64_from_encode as base64, - unbase64_from_decode as unbase64, - decode_from_convert_from as decode, - encode_from_convert_to as encode, - format_number_from_to_char as format_number, - format_string_with_format as format_string, - split_from_regex_split_to_array as split, - array_contains_any as array_contains, - slice_with_brackets as slice, - element_at_using_brackets as element_at, - get_json_object_using_arrow_op as get_json_object, - array_min_from_subquery as array_min, - array_max_from_subquery as array_max, - left_cast_len as left, - right_cast_len as right, - position_cast_start as position, - try_element_at_zero_based as try_element_at, - regexp_replace_global_option as regexp_replace, -) -from sqlframe.base.functions import ( - abs as abs, - acos as acos, - acosh as acosh, - array as array, - array_join as array_join, - array_position as array_position, - array_remove as array_remove, - arrays_overlap as arrays_overlap, - asc as asc, - asc_nulls_first as asc_nulls_first, - asc_nulls_last as asc_nulls_last, - ascii as ascii, - asin as asin, - asinh as asinh, - atan as atan, - atan2 as atan2, - atanh as atanh, - avg as avg, - bit_length as bit_length, - bitwiseNOT as bitwiseNOT, - bitwise_not as bitwise_not, - bool_and as bool_and, - bool_or as bool_or, - call_function as call_function, - cbrt as cbrt, - ceil as ceil, - ceiling as ceiling, - char as char, - coalesce as coalesce, - col as col, - collect_list as collect_list, - concat as concat, - concat_ws as concat_ws, - corr as corr, - cos as cos, - cosh as cosh, - cot as cot, - count as count, - countDistinct as countDistinct, - count_distinct as count_distinct, - covar_pop as covar_pop, - covar_samp as covar_samp, - cume_dist as cume_dist, - current_date as current_date, - current_timestamp as current_timestamp, - current_user as current_user, - date_format as date_format, - date_trunc as date_trunc, - dateadd as dateadd, - degrees as degrees, - dense_rank as dense_rank, - desc as desc, - desc_nulls_first as desc_nulls_first, - desc_nulls_last as desc_nulls_last, - exp as exp, - explode as explode, - expr as expr, - extract as extract, - factorial as factorial, - floor as floor, - greatest as greatest, - ifnull as ifnull, - initcap as initcap, - input_file_name as input_file_name, - instr as instr, - lag as lag, - lcase as lcase, - lead as lead, - least as least, - length as length, - levenshtein as levenshtein, - lit as lit, - ln as ln, - locate as locate, - log as log, - log10 as log10, - log2 as log2, - lower as lower, - lpad as lpad, - ltrim as ltrim, - max as max, - md5 as md5, - mean as mean, - min as min, - now as now, - nth_value as nth_value, - ntile as ntile, - nullif as nullif, - nvl as nvl, - nvl2 as nvl2, - octet_length as octet_length, - overlay as overlay, - percent_rank as percent_rank, - percentile as percentile, - pow as pow, - power as power, - radians as radians, - rank as rank, - regexp_like as regexp_like, - repeat as repeat, - reverse as reverse, - rlike as rlike, - row_number as row_number, - rpad as rpad, - rtrim as rtrim, - shiftLeft as shiftLeft, - shiftRight as shiftRight, - shiftleft as shiftleft, - shiftright as shiftright, - sign as sign, - signum as signum, - sin as sin, - sinh as sinh, - size as size, - soundex as soundex, - sqrt as sqrt, - startswith as startswith, - stddev as stddev, - stddev_pop as stddev_pop, - stddev_samp as stddev_samp, - substring as substring, - sum as sum, - sumDistinct as sumDistinct, - sum_distinct as sum_distinct, - tan as tan, - tanh as tanh, - timestamp_seconds as timestamp_seconds, - toDegrees as toDegrees, - toRadians as toRadians, - to_date as to_date, - to_number as to_number, - to_timestamp as to_timestamp, - translate as translate, - trim as trim, - trunc as trunc, - ucase as ucase, - unix_date as unix_date, - upper as upper, - user as user, - var_pop as var_pop, - var_samp as var_samp, - variance as variance, - when as when, -) +from sqlframe.base.functions import abs as abs +from sqlframe.base.functions import acos as acos +from sqlframe.base.functions import acosh as acosh +from sqlframe.base.functions import add_months as add_months +from sqlframe.base.functions import any_value as any_value +from sqlframe.base.functions import array as array +from sqlframe.base.functions import array_append as array_append +from sqlframe.base.functions import array_contains as array_contains +from sqlframe.base.functions import array_join as array_join +from sqlframe.base.functions import array_max as array_max +from sqlframe.base.functions import array_min as array_min +from sqlframe.base.functions import array_position as array_position +from sqlframe.base.functions import array_remove as array_remove +from sqlframe.base.functions import arrays_overlap as arrays_overlap +from sqlframe.base.functions import asc as asc +from sqlframe.base.functions import asc_nulls_first as asc_nulls_first +from sqlframe.base.functions import asc_nulls_last as asc_nulls_last +from sqlframe.base.functions import ascii as ascii +from sqlframe.base.functions import asin as asin +from sqlframe.base.functions import asinh as asinh +from sqlframe.base.functions import atan as atan +from sqlframe.base.functions import atan2 as atan2 +from sqlframe.base.functions import atanh as atanh +from sqlframe.base.functions import avg as avg +from sqlframe.base.functions import base64 as base64 +from sqlframe.base.functions import bit_length as bit_length +from sqlframe.base.functions import bitwise_not as bitwise_not +from sqlframe.base.functions import bitwiseNOT as bitwiseNOT +from sqlframe.base.functions import bool_and as bool_and +from sqlframe.base.functions import bool_or as bool_or +from sqlframe.base.functions import call_function as call_function +from sqlframe.base.functions import cbrt as cbrt +from sqlframe.base.functions import ceil as ceil +from sqlframe.base.functions import ceiling as ceiling +from sqlframe.base.functions import char as char +from sqlframe.base.functions import coalesce as coalesce +from sqlframe.base.functions import col as col +from sqlframe.base.functions import collect_list as collect_list +from sqlframe.base.functions import collect_set as collect_set +from sqlframe.base.functions import concat as concat +from sqlframe.base.functions import concat_ws as concat_ws +from sqlframe.base.functions import corr as corr +from sqlframe.base.functions import cos as cos +from sqlframe.base.functions import cosh as cosh +from sqlframe.base.functions import cot as cot +from sqlframe.base.functions import count as count +from sqlframe.base.functions import count_distinct as count_distinct +from sqlframe.base.functions import countDistinct as countDistinct +from sqlframe.base.functions import covar_pop as covar_pop +from sqlframe.base.functions import covar_samp as covar_samp +from sqlframe.base.functions import cume_dist as cume_dist +from sqlframe.base.functions import current_date as current_date +from sqlframe.base.functions import current_timestamp as current_timestamp +from sqlframe.base.functions import current_user as current_user +from sqlframe.base.functions import date_add as date_add +from sqlframe.base.functions import date_diff as date_diff +from sqlframe.base.functions import date_format as date_format +from sqlframe.base.functions import date_sub as date_sub +from sqlframe.base.functions import date_trunc as date_trunc +from sqlframe.base.functions import dateadd as dateadd +from sqlframe.base.functions import datediff as datediff +from sqlframe.base.functions import dayofmonth as dayofmonth +from sqlframe.base.functions import dayofweek as dayofweek +from sqlframe.base.functions import dayofyear as dayofyear +from sqlframe.base.functions import decode as decode +from sqlframe.base.functions import degrees as degrees +from sqlframe.base.functions import dense_rank as dense_rank +from sqlframe.base.functions import desc as desc +from sqlframe.base.functions import desc_nulls_first as desc_nulls_first +from sqlframe.base.functions import desc_nulls_last as desc_nulls_last +from sqlframe.base.functions import e as e +from sqlframe.base.functions import element_at as element_at +from sqlframe.base.functions import encode as encode +from sqlframe.base.functions import endswith as endswith +from sqlframe.base.functions import exp as exp +from sqlframe.base.functions import explode as explode +from sqlframe.base.functions import expm1 as expm1 +from sqlframe.base.functions import expr as expr +from sqlframe.base.functions import extract as extract +from sqlframe.base.functions import factorial as factorial +from sqlframe.base.functions import floor as floor +from sqlframe.base.functions import format_number as format_number +from sqlframe.base.functions import format_string as format_string +from sqlframe.base.functions import from_unixtime as from_unixtime +from sqlframe.base.functions import get_json_object as get_json_object +from sqlframe.base.functions import greatest as greatest +from sqlframe.base.functions import hour as hour +from sqlframe.base.functions import ifnull as ifnull +from sqlframe.base.functions import initcap as initcap +from sqlframe.base.functions import input_file_name as input_file_name +from sqlframe.base.functions import instr as instr +from sqlframe.base.functions import isnan as isnan +from sqlframe.base.functions import isnull as isnull +from sqlframe.base.functions import lag as lag +from sqlframe.base.functions import last_day as last_day +from sqlframe.base.functions import lcase as lcase +from sqlframe.base.functions import lead as lead +from sqlframe.base.functions import least as least +from sqlframe.base.functions import left as left +from sqlframe.base.functions import length as length +from sqlframe.base.functions import levenshtein as levenshtein +from sqlframe.base.functions import lit as lit +from sqlframe.base.functions import ln as ln +from sqlframe.base.functions import locate as locate +from sqlframe.base.functions import log as log +from sqlframe.base.functions import log1p as log1p +from sqlframe.base.functions import log2 as log2 +from sqlframe.base.functions import log10 as log10 +from sqlframe.base.functions import lower as lower +from sqlframe.base.functions import lpad as lpad +from sqlframe.base.functions import ltrim as ltrim +from sqlframe.base.functions import make_date as make_date +from sqlframe.base.functions import max as max +from sqlframe.base.functions import md5 as md5 +from sqlframe.base.functions import mean as mean +from sqlframe.base.functions import median as median +from sqlframe.base.functions import min as min +from sqlframe.base.functions import minute as minute +from sqlframe.base.functions import month as month +from sqlframe.base.functions import months_between as months_between +from sqlframe.base.functions import nanvl as nanvl +from sqlframe.base.functions import now as now +from sqlframe.base.functions import nth_value as nth_value +from sqlframe.base.functions import ntile as ntile +from sqlframe.base.functions import nullif as nullif +from sqlframe.base.functions import nvl as nvl +from sqlframe.base.functions import nvl2 as nvl2 +from sqlframe.base.functions import octet_length as octet_length +from sqlframe.base.functions import overlay as overlay +from sqlframe.base.functions import percent_rank as percent_rank +from sqlframe.base.functions import percentile as percentile +from sqlframe.base.functions import position as position +from sqlframe.base.functions import pow as pow +from sqlframe.base.functions import power as power +from sqlframe.base.functions import quarter as quarter +from sqlframe.base.functions import radians as radians +from sqlframe.base.functions import rand as rand +from sqlframe.base.functions import rank as rank +from sqlframe.base.functions import regexp_like as regexp_like +from sqlframe.base.functions import regexp_replace as regexp_replace +from sqlframe.base.functions import repeat as repeat +from sqlframe.base.functions import reverse as reverse +from sqlframe.base.functions import right as right +from sqlframe.base.functions import rint as rint +from sqlframe.base.functions import rlike as rlike +from sqlframe.base.functions import round as round +from sqlframe.base.functions import row_number as row_number +from sqlframe.base.functions import rpad as rpad +from sqlframe.base.functions import rtrim as rtrim +from sqlframe.base.functions import second as second +from sqlframe.base.functions import shiftLeft as shiftLeft +from sqlframe.base.functions import shiftleft as shiftleft +from sqlframe.base.functions import shiftRight as shiftRight +from sqlframe.base.functions import shiftright as shiftright +from sqlframe.base.functions import sign as sign +from sqlframe.base.functions import signum as signum +from sqlframe.base.functions import sin as sin +from sqlframe.base.functions import sinh as sinh +from sqlframe.base.functions import size as size +from sqlframe.base.functions import slice as slice +from sqlframe.base.functions import soundex as soundex +from sqlframe.base.functions import split as split +from sqlframe.base.functions import sqrt as sqrt +from sqlframe.base.functions import startswith as startswith +from sqlframe.base.functions import stddev as stddev +from sqlframe.base.functions import stddev_pop as stddev_pop +from sqlframe.base.functions import stddev_samp as stddev_samp +from sqlframe.base.functions import substring as substring +from sqlframe.base.functions import sum as sum +from sqlframe.base.functions import sum_distinct as sum_distinct +from sqlframe.base.functions import sumDistinct as sumDistinct +from sqlframe.base.functions import tan as tan +from sqlframe.base.functions import tanh as tanh +from sqlframe.base.functions import timestamp_seconds as timestamp_seconds +from sqlframe.base.functions import to_date as to_date +from sqlframe.base.functions import to_number as to_number +from sqlframe.base.functions import to_timestamp as to_timestamp +from sqlframe.base.functions import toDegrees as toDegrees +from sqlframe.base.functions import toRadians as toRadians +from sqlframe.base.functions import translate as translate +from sqlframe.base.functions import trim as trim +from sqlframe.base.functions import trunc as trunc +from sqlframe.base.functions import try_element_at as try_element_at +from sqlframe.base.functions import try_to_timestamp as try_to_timestamp +from sqlframe.base.functions import typeof as typeof +from sqlframe.base.functions import ucase as ucase +from sqlframe.base.functions import unbase64 as unbase64 +from sqlframe.base.functions import unix_date as unix_date +from sqlframe.base.functions import unix_timestamp as unix_timestamp +from sqlframe.base.functions import upper as upper +from sqlframe.base.functions import user as user +from sqlframe.base.functions import var_pop as var_pop +from sqlframe.base.functions import var_samp as var_samp +from sqlframe.base.functions import variance as variance +from sqlframe.base.functions import weekofyear as weekofyear +from sqlframe.base.functions import when as when +from sqlframe.base.functions import year as year diff --git a/sqlframe/postgres/session.py b/sqlframe/postgres/session.py index f3b26df..cf3dcc7 100644 --- a/sqlframe/postgres/session.py +++ b/sqlframe/postgres/session.py @@ -71,6 +71,10 @@ def _collect( return [] raise e + @property + def _is_postgres(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_EXECUTION_DIALECT = "postgres" diff --git a/sqlframe/redshift/functions.py b/sqlframe/redshift/functions.py index 5d24a8a..0a31c36 100644 --- a/sqlframe/redshift/functions.py +++ b/sqlframe/redshift/functions.py @@ -1,7 +1,7 @@ import inspect import sys -import sqlframe.base.functions +import sqlframe.base.functions # noqa module = sys.modules["sqlframe.base.functions"] globals().update( @@ -13,6 +13,3 @@ and "*" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import e_literal as e # noqa diff --git a/sqlframe/redshift/session.py b/sqlframe/redshift/session.py index ed8dd19..3103988 100644 --- a/sqlframe/redshift/session.py +++ b/sqlframe/redshift/session.py @@ -41,6 +41,10 @@ def __init__(self, conn: t.Optional[RedshiftConnection] = None): if not hasattr(self, "_conn"): super().__init__(conn) + @property + def _is_redshift(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_EXECUTION_DIALECT = "redshift" diff --git a/sqlframe/snowflake/functions.py b/sqlframe/snowflake/functions.py index 7fa1629..15e3f9a 100644 --- a/sqlframe/snowflake/functions.py +++ b/sqlframe/snowflake/functions.py @@ -1,7 +1,7 @@ import inspect import sys -import sqlframe.base.functions +import sqlframe.base.functions # noqa module = sys.modules["sqlframe.base.functions"] globals().update( @@ -13,57 +13,3 @@ and "*" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import ( # noqa - any_value_ignore_nulls_not_supported as any_value, - e_literal as e, - expm1_from_exp as expm1, - log1p_from_log as log1p, - rint_from_round as rint, - bitwise_not_from_bitnot as bitwise_not, - skewness_from_skew as skewness, - isnan_using_equal as isnan, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - percentile_approx_without_accuracy_and_max_array as percentile_approx, - bround_using_half_even as bround, - shiftleft_from_bitshiftleft as shiftleft, - shiftright_from_bitshiftright as shiftright, - struct_with_eq as struct, - make_date_date_from_parts as make_date, - date_add_no_date_sub as date_add, - date_add_no_date_sub as dateadd, - date_sub_by_date_add as date_sub, - add_months_using_func as add_months, - months_between_cast_as_date_cast_roundoff as months_between, - last_day_with_cast as last_day, - from_unixtime_from_timestamp as from_unixtime, - unix_timestamp_from_extract as unix_timestamp, - base64_from_base64_encode as base64, - unbase64_from_base64_decode_string as unbase64, - format_number_from_to_char as format_number, - overlay_from_substr as overlay, - levenshtein_edit_distance as levenshtein, - split_with_split as split, - regexp_extract_coalesce_empty_str as regexp_extract, - hex_using_encode as hex, - unhex_hex_decode_str as unhex, - create_map_with_cast as create_map, - array_contains_cast_variant as array_contains, - arrays_overlap_as_plural as arrays_overlap, - slice_as_array_slice as slice, - array_join_no_null_replacement as array_join, - array_position_cast_variant_and_flip as array_position, - element_at_using_brackets as element_at, - array_intersect_using_intersection as array_intersect, - array_union_using_array_concat as array_union, - sort_array_using_array_sort as sort_array, - flatten_using_array_flatten as flatten, - map_concat_using_map_cat as map_concat, - sequence_from_array_generate_range as sequence, - to_number_using_to_double as to_number, - typeof_from_variant as typeof, - to_date_time_format as to_date, - _is_integer_using_func as _is_integer, -) diff --git a/sqlframe/snowflake/functions.pyi b/sqlframe/snowflake/functions.pyi index dc903e5..6369b0b 100644 --- a/sqlframe/snowflake/functions.pyi +++ b/sqlframe/snowflake/functions.pyi @@ -1,220 +1,224 @@ -from sqlframe.base.function_alternatives import ( # noqa - any_value_ignore_nulls_not_supported as any_value, - e_literal as e, - expm1_from_exp as expm1, - log1p_from_log as log1p, - rint_from_round as rint, - bitwise_not_from_bitnot as bitwise_not, - skewness_from_skew as skewness, - isnan_using_equal as isnan, - isnull_using_equal as isnull, - nanvl_as_case as nanvl, - percentile_approx_without_accuracy_and_max_array as percentile_approx, - bround_using_half_even as bround, - shiftleft_from_bitshiftleft as shiftleft, - shiftright_from_bitshiftright as shiftright, - struct_with_eq as struct, - make_date_date_from_parts as make_date, - date_add_no_date_sub as date_add, - date_add_no_date_sub as dateadd, - date_sub_by_date_add as date_sub, - add_months_using_func as add_months, - months_between_cast_as_date_cast_roundoff as months_between, - last_day_with_cast as last_day, - from_unixtime_from_timestamp as from_unixtime, - unix_timestamp_from_extract as unix_timestamp, - base64_from_base64_encode as base64, - unbase64_from_base64_decode_string as unbase64, - format_number_from_to_char as format_number, - overlay_from_substr as overlay, - levenshtein_edit_distance as levenshtein, - split_with_split as split, - regexp_extract_coalesce_empty_str as regexp_extract, - hex_using_encode as hex, - unhex_hex_decode_str as unhex, - create_map_with_cast as create_map, - array_contains_cast_variant as array_contains, - arrays_overlap_as_plural as arrays_overlap, - slice_as_array_slice as slice, - array_join_no_null_replacement as array_join, - array_position_cast_variant_and_flip as array_position, - element_at_using_brackets as element_at, - array_intersect_using_intersection as array_intersect, - array_union_using_array_concat as array_union, - sort_array_using_array_sort as sort_array, - flatten_using_array_flatten as flatten, - map_concat_using_map_cat as map_concat, - sequence_from_array_generate_range as sequence, - to_number_using_to_double as to_number, -) -from sqlframe.base.functions import ( - abs as abs, - acos as acos, - acosh as acosh, - approxCountDistinct as approxCountDistinct, - approx_count_distinct as approx_count_distinct, - array as array, - array_distinct as array_distinct, - array_except as array_except, - array_max as array_max, - array_min as array_min, - array_remove as array_remove, - array_sort as array_sort, - asc as asc, - asc_nulls_first as asc_nulls_first, - asc_nulls_last as asc_nulls_last, - ascii as ascii, - asin as asin, - asinh as asinh, - atan as atan, - atan2 as atan2, - atanh as atanh, - avg as avg, - bit_length as bit_length, - bitwiseNOT as bitwiseNOT, - bool_and as bool_and, - bool_or as bool_or, - call_function as call_function, - cbrt as cbrt, - ceil as ceil, - ceiling as ceiling, - char as char, - coalesce as coalesce, - col as col, - collect_list as collect_list, - collect_set as collect_set, - concat as concat, - concat_ws as concat_ws, - corr as corr, - cos as cos, - cosh as cosh, - cot as cot, - count as count, - countDistinct as countDistinct, - count_distinct as count_distinct, - count_if as count_if, - covar_pop as covar_pop, - covar_samp as covar_samp, - cume_dist as cume_dist, - current_date as current_date, - current_timestamp as current_timestamp, - current_user as current_user, - date_diff as date_diff, - date_format as date_format, - date_trunc as date_trunc, - datediff as datediff, - dayofmonth as dayofmonth, - dayofweek as dayofweek, - dayofyear as dayofyear, - degrees as degrees, - dense_rank as dense_rank, - desc as desc, - desc_nulls_first as desc_nulls_first, - desc_nulls_last as desc_nulls_last, - exp as exp, - explode as explode, - expr as expr, - extract as extract, - factorial as factorial, - floor as floor, - greatest as greatest, - grouping_id as grouping_id, - hash as hash, - hour as hour, - ifnull as ifnull, - initcap as initcap, - input_file_name as input_file_name, - instr as instr, - kurtosis as kurtosis, - lag as lag, - lcase as lcase, - lead as lead, - least as least, - left as left, - length as length, - lit as lit, - ln as ln, - locate as locate, - log as log, - log10 as log10, - log2 as log2, - lower as lower, - lpad as lpad, - ltrim as ltrim, - map_keys as map_keys, - max as max, - max_by as max_by, - md5 as md5, - mean as mean, - min as min, - min_by as min_by, - minute as minute, - month as month, - next_day as next_day, - now as now, - nth_value as nth_value, - ntile as ntile, - nullif as nullif, - nvl as nvl, - nvl2 as nvl2, - octet_length as octet_length, - percent_rank as percent_rank, - percentile as percentile, - posexplode as posexplode, - position as position, - pow as pow, - power as power, - quarter as quarter, - radians as radians, - rand as rand, - rank as rank, - regexp_replace as regexp_replace, - repeat as repeat, - right as right, - round as round, - row_number as row_number, - rpad as rpad, - rtrim as rtrim, - second as second, - sha as sha, - sha1 as sha1, - sha2 as sha2, - shiftLeft as shiftLeft, - shiftRight as shiftRight, - sign as sign, - signum as signum, - sin as sin, - sinh as sinh, - size as size, - soundex as soundex, - split_part as split_part, - sqrt as sqrt, - startswith as startswith, - stddev as stddev, - stddev_pop as stddev_pop, - stddev_samp as stddev_samp, - substring as substring, - substr as substr, - sum as sum, - sumDistinct as sumDistinct, - sum_distinct as sum_distinct, - tan as tan, - tanh as tanh, - timestamp_seconds as timestamp_seconds, - toDegrees as toDegrees, - toRadians as toRadians, - to_date as to_date, - to_timestamp as to_timestamp, - translate as translate, - trim as trim, - trunc as trunc, - ucase as ucase, - upper as upper, - user as user, - var_pop as var_pop, - var_samp as var_samp, - variance as variance, - weekofyear as weekofyear, - when as when, - year as year, -) +from sqlframe.base.functions import abs as abs +from sqlframe.base.functions import acos as acos +from sqlframe.base.functions import acosh as acosh +from sqlframe.base.functions import add_months as add_months +from sqlframe.base.functions import any_value as any_value +from sqlframe.base.functions import approx_count_distinct as approx_count_distinct +from sqlframe.base.functions import approxCountDistinct as approxCountDistinct +from sqlframe.base.functions import array as array +from sqlframe.base.functions import array_append as array_append +from sqlframe.base.functions import array_contains as array_contains +from sqlframe.base.functions import array_distinct as array_distinct +from sqlframe.base.functions import array_except as array_except +from sqlframe.base.functions import array_intersect as array_intersect +from sqlframe.base.functions import array_join as array_join +from sqlframe.base.functions import array_max as array_max +from sqlframe.base.functions import array_min as array_min +from sqlframe.base.functions import array_position as array_position +from sqlframe.base.functions import array_remove as array_remove +from sqlframe.base.functions import array_sort as array_sort +from sqlframe.base.functions import array_union as array_union +from sqlframe.base.functions import arrays_overlap as arrays_overlap +from sqlframe.base.functions import asc as asc +from sqlframe.base.functions import asc_nulls_first as asc_nulls_first +from sqlframe.base.functions import asc_nulls_last as asc_nulls_last +from sqlframe.base.functions import ascii as ascii +from sqlframe.base.functions import asin as asin +from sqlframe.base.functions import asinh as asinh +from sqlframe.base.functions import atan as atan +from sqlframe.base.functions import atan2 as atan2 +from sqlframe.base.functions import atanh as atanh +from sqlframe.base.functions import avg as avg +from sqlframe.base.functions import base64 as base64 +from sqlframe.base.functions import bit_length as bit_length +from sqlframe.base.functions import bitwise_not as bitwise_not +from sqlframe.base.functions import bitwiseNOT as bitwiseNOT +from sqlframe.base.functions import bool_and as bool_and +from sqlframe.base.functions import bool_or as bool_or +from sqlframe.base.functions import bround as bround +from sqlframe.base.functions import call_function as call_function +from sqlframe.base.functions import cbrt as cbrt +from sqlframe.base.functions import ceil as ceil +from sqlframe.base.functions import ceiling as ceiling +from sqlframe.base.functions import char as char +from sqlframe.base.functions import coalesce as coalesce +from sqlframe.base.functions import col as col +from sqlframe.base.functions import collect_list as collect_list +from sqlframe.base.functions import collect_set as collect_set +from sqlframe.base.functions import concat as concat +from sqlframe.base.functions import concat_ws as concat_ws +from sqlframe.base.functions import contains as contains +from sqlframe.base.functions import convert_timezone as convert_timezone +from sqlframe.base.functions import corr as corr +from sqlframe.base.functions import cos as cos +from sqlframe.base.functions import cosh as cosh +from sqlframe.base.functions import cot as cot +from sqlframe.base.functions import count as count +from sqlframe.base.functions import count_distinct as count_distinct +from sqlframe.base.functions import count_if as count_if +from sqlframe.base.functions import countDistinct as countDistinct +from sqlframe.base.functions import covar_pop as covar_pop +from sqlframe.base.functions import covar_samp as covar_samp +from sqlframe.base.functions import create_map as create_map +from sqlframe.base.functions import cume_dist as cume_dist +from sqlframe.base.functions import current_date as current_date +from sqlframe.base.functions import current_timestamp as current_timestamp +from sqlframe.base.functions import current_user as current_user +from sqlframe.base.functions import date_add as date_add +from sqlframe.base.functions import date_diff as date_diff +from sqlframe.base.functions import date_format as date_format +from sqlframe.base.functions import date_sub as date_sub +from sqlframe.base.functions import date_trunc as date_trunc +from sqlframe.base.functions import dateadd as dateadd +from sqlframe.base.functions import datediff as datediff +from sqlframe.base.functions import dayofmonth as dayofmonth +from sqlframe.base.functions import dayofweek as dayofweek +from sqlframe.base.functions import dayofyear as dayofyear +from sqlframe.base.functions import degrees as degrees +from sqlframe.base.functions import dense_rank as dense_rank +from sqlframe.base.functions import desc as desc +from sqlframe.base.functions import desc_nulls_first as desc_nulls_first +from sqlframe.base.functions import desc_nulls_last as desc_nulls_last +from sqlframe.base.functions import e as e +from sqlframe.base.functions import element_at as element_at +from sqlframe.base.functions import endswith as endswith +from sqlframe.base.functions import exp as exp +from sqlframe.base.functions import explode as explode +from sqlframe.base.functions import expm1 as expm1 +from sqlframe.base.functions import expr as expr +from sqlframe.base.functions import extract as extract +from sqlframe.base.functions import factorial as factorial +from sqlframe.base.functions import flatten as flatten +from sqlframe.base.functions import floor as floor +from sqlframe.base.functions import format_number as format_number +from sqlframe.base.functions import from_unixtime as from_unixtime +from sqlframe.base.functions import greatest as greatest +from sqlframe.base.functions import grouping_id as grouping_id +from sqlframe.base.functions import hash as hash +from sqlframe.base.functions import hex as hex +from sqlframe.base.functions import hour as hour +from sqlframe.base.functions import ifnull as ifnull +from sqlframe.base.functions import initcap as initcap +from sqlframe.base.functions import input_file_name as input_file_name +from sqlframe.base.functions import instr as instr +from sqlframe.base.functions import isnan as isnan +from sqlframe.base.functions import isnull as isnull +from sqlframe.base.functions import kurtosis as kurtosis +from sqlframe.base.functions import lag as lag +from sqlframe.base.functions import last_day as last_day +from sqlframe.base.functions import lcase as lcase +from sqlframe.base.functions import lead as lead +from sqlframe.base.functions import least as least +from sqlframe.base.functions import left as left +from sqlframe.base.functions import length as length +from sqlframe.base.functions import levenshtein as levenshtein +from sqlframe.base.functions import lit as lit +from sqlframe.base.functions import ln as ln +from sqlframe.base.functions import locate as locate +from sqlframe.base.functions import log as log +from sqlframe.base.functions import log1p as log1p +from sqlframe.base.functions import log2 as log2 +from sqlframe.base.functions import log10 as log10 +from sqlframe.base.functions import lower as lower +from sqlframe.base.functions import lpad as lpad +from sqlframe.base.functions import ltrim as ltrim +from sqlframe.base.functions import make_date as make_date +from sqlframe.base.functions import map_concat as map_concat +from sqlframe.base.functions import map_keys as map_keys +from sqlframe.base.functions import max as max +from sqlframe.base.functions import max_by as max_by +from sqlframe.base.functions import md5 as md5 +from sqlframe.base.functions import mean as mean +from sqlframe.base.functions import median as median +from sqlframe.base.functions import min as min +from sqlframe.base.functions import min_by as min_by +from sqlframe.base.functions import minute as minute +from sqlframe.base.functions import month as month +from sqlframe.base.functions import months_between as months_between +from sqlframe.base.functions import nanvl as nanvl +from sqlframe.base.functions import next_day as next_day +from sqlframe.base.functions import now as now +from sqlframe.base.functions import nth_value as nth_value +from sqlframe.base.functions import ntile as ntile +from sqlframe.base.functions import nullif as nullif +from sqlframe.base.functions import nvl as nvl +from sqlframe.base.functions import nvl2 as nvl2 +from sqlframe.base.functions import octet_length as octet_length +from sqlframe.base.functions import overlay as overlay +from sqlframe.base.functions import percent_rank as percent_rank +from sqlframe.base.functions import percentile as percentile +from sqlframe.base.functions import percentile_approx as percentile_approx +from sqlframe.base.functions import posexplode as posexplode +from sqlframe.base.functions import position as position +from sqlframe.base.functions import pow as pow +from sqlframe.base.functions import power as power +from sqlframe.base.functions import quarter as quarter +from sqlframe.base.functions import radians as radians +from sqlframe.base.functions import rand as rand +from sqlframe.base.functions import rank as rank +from sqlframe.base.functions import regexp_extract as regexp_extract +from sqlframe.base.functions import regexp_replace as regexp_replace +from sqlframe.base.functions import repeat as repeat +from sqlframe.base.functions import right as right +from sqlframe.base.functions import rint as rint +from sqlframe.base.functions import round as round +from sqlframe.base.functions import row_number as row_number +from sqlframe.base.functions import rpad as rpad +from sqlframe.base.functions import rtrim as rtrim +from sqlframe.base.functions import second as second +from sqlframe.base.functions import sequence as sequence +from sqlframe.base.functions import sha as sha +from sqlframe.base.functions import sha1 as sha1 +from sqlframe.base.functions import sha2 as sha2 +from sqlframe.base.functions import shiftLeft as shiftLeft +from sqlframe.base.functions import shiftleft as shiftleft +from sqlframe.base.functions import shiftRight as shiftRight +from sqlframe.base.functions import shiftright as shiftright +from sqlframe.base.functions import sign as sign +from sqlframe.base.functions import signum as signum +from sqlframe.base.functions import sin as sin +from sqlframe.base.functions import sinh as sinh +from sqlframe.base.functions import size as size +from sqlframe.base.functions import skewness as skewness +from sqlframe.base.functions import slice as slice +from sqlframe.base.functions import sort_array as sort_array +from sqlframe.base.functions import soundex as soundex +from sqlframe.base.functions import split as split +from sqlframe.base.functions import split_part as split_part +from sqlframe.base.functions import sqrt as sqrt +from sqlframe.base.functions import startswith as startswith +from sqlframe.base.functions import stddev as stddev +from sqlframe.base.functions import stddev_pop as stddev_pop +from sqlframe.base.functions import stddev_samp as stddev_samp +from sqlframe.base.functions import struct as struct +from sqlframe.base.functions import substr as substr +from sqlframe.base.functions import substring as substring +from sqlframe.base.functions import sum as sum +from sqlframe.base.functions import sum_distinct as sum_distinct +from sqlframe.base.functions import sumDistinct as sumDistinct +from sqlframe.base.functions import tan as tan +from sqlframe.base.functions import tanh as tanh +from sqlframe.base.functions import timestamp_seconds as timestamp_seconds +from sqlframe.base.functions import to_date as to_date +from sqlframe.base.functions import to_number as to_number +from sqlframe.base.functions import to_timestamp as to_timestamp +from sqlframe.base.functions import toDegrees as toDegrees +from sqlframe.base.functions import toRadians as toRadians +from sqlframe.base.functions import translate as translate +from sqlframe.base.functions import trim as trim +from sqlframe.base.functions import trunc as trunc +from sqlframe.base.functions import try_to_timestamp as try_to_timestamp +from sqlframe.base.functions import typeof as typeof +from sqlframe.base.functions import ucase as ucase +from sqlframe.base.functions import unbase64 as unbase64 +from sqlframe.base.functions import unhex as unhex +from sqlframe.base.functions import unix_seconds as unix_seconds +from sqlframe.base.functions import unix_timestamp as unix_timestamp +from sqlframe.base.functions import upper as upper +from sqlframe.base.functions import user as user +from sqlframe.base.functions import var_pop as var_pop +from sqlframe.base.functions import var_samp as var_samp +from sqlframe.base.functions import variance as variance +from sqlframe.base.functions import weekofyear as weekofyear +from sqlframe.base.functions import when as when +from sqlframe.base.functions import year as year diff --git a/sqlframe/snowflake/session.py b/sqlframe/snowflake/session.py index 0ef56a0..ddfd742 100644 --- a/sqlframe/snowflake/session.py +++ b/sqlframe/snowflake/session.py @@ -78,6 +78,10 @@ def __init__(self, conn: t.Optional[SnowflakeConnection] = None): else: self._conn._converter_class = JsonLoadsSnowflakeConverter # type: ignore + @property + def _is_snowflake(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_EXECUTION_DIALECT = "snowflake" diff --git a/sqlframe/spark/functions.py b/sqlframe/spark/functions.py index eee0e29..da31d04 100644 --- a/sqlframe/spark/functions.py +++ b/sqlframe/spark/functions.py @@ -11,12 +11,3 @@ if hasattr(func, "unsupported_engines") and "spark" not in func.unsupported_engines } ) - - -from sqlframe.base.function_alternatives import ( # noqa - percentile_without_disc as percentile, - add_months_by_multiplication as add_months, - arrays_overlap_renamed as arrays_overlap, - _is_string_using_typeof_string_lcase as _is_string, - try_element_at_zero_based as try_element_at, -) diff --git a/sqlframe/spark/functions.pyi b/sqlframe/spark/functions.pyi index 42672f7..3571c92 100644 --- a/sqlframe/spark/functions.pyi +++ b/sqlframe/spark/functions.pyi @@ -1,416 +1,414 @@ -from sqlframe.base.function_alternatives import ( # noqa - percentile_without_disc as percentile, - add_months_by_multiplication as add_months, - arrays_overlap_renamed as arrays_overlap, - try_element_at_zero_based as try_element_at, -) +from sqlframe.base.functions import abs as abs +from sqlframe.base.functions import acos as acos +from sqlframe.base.functions import acosh as acosh +from sqlframe.base.functions import add_months as add_months +from sqlframe.base.functions import aes_decrypt as aes_decrypt +from sqlframe.base.functions import aes_encrypt as aes_encrypt +from sqlframe.base.functions import aggregate as aggregate +from sqlframe.base.functions import any_value as any_value +from sqlframe.base.functions import approx_count_distinct as approx_count_distinct +from sqlframe.base.functions import approx_percentile as approx_percentile +from sqlframe.base.functions import approxCountDistinct as approxCountDistinct +from sqlframe.base.functions import array as array +from sqlframe.base.functions import array_agg as array_agg +from sqlframe.base.functions import array_append as array_append +from sqlframe.base.functions import array_compact as array_compact +from sqlframe.base.functions import array_contains as array_contains +from sqlframe.base.functions import array_distinct as array_distinct +from sqlframe.base.functions import array_except as array_except +from sqlframe.base.functions import array_insert as array_insert +from sqlframe.base.functions import array_intersect as array_intersect +from sqlframe.base.functions import array_join as array_join +from sqlframe.base.functions import array_max as array_max +from sqlframe.base.functions import array_min as array_min +from sqlframe.base.functions import array_position as array_position +from sqlframe.base.functions import array_prepend as array_prepend +from sqlframe.base.functions import array_remove as array_remove +from sqlframe.base.functions import array_repeat as array_repeat +from sqlframe.base.functions import array_size as array_size +from sqlframe.base.functions import array_sort as array_sort +from sqlframe.base.functions import array_union as array_union +from sqlframe.base.functions import arrays_overlap as arrays_overlap +from sqlframe.base.functions import arrays_zip as arrays_zip +from sqlframe.base.functions import asc as asc +from sqlframe.base.functions import asc_nulls_first as asc_nulls_first +from sqlframe.base.functions import asc_nulls_last as asc_nulls_last +from sqlframe.base.functions import ascii as ascii +from sqlframe.base.functions import asin as asin +from sqlframe.base.functions import asinh as asinh +from sqlframe.base.functions import assert_true as assert_true +from sqlframe.base.functions import atan as atan +from sqlframe.base.functions import atan2 as atan2 +from sqlframe.base.functions import atanh as atanh +from sqlframe.base.functions import avg as avg +from sqlframe.base.functions import base64 as base64 +from sqlframe.base.functions import bin as bin +from sqlframe.base.functions import bit_and as bit_and +from sqlframe.base.functions import bit_count as bit_count +from sqlframe.base.functions import bit_get as bit_get +from sqlframe.base.functions import bit_length as bit_length +from sqlframe.base.functions import bit_or as bit_or +from sqlframe.base.functions import bit_xor as bit_xor +from sqlframe.base.functions import bitmap_bit_position as bitmap_bit_position +from sqlframe.base.functions import bitmap_bucket_number as bitmap_bucket_number +from sqlframe.base.functions import bitmap_construct_agg as bitmap_construct_agg +from sqlframe.base.functions import bitmap_count as bitmap_count +from sqlframe.base.functions import bitmap_or_agg as bitmap_or_agg +from sqlframe.base.functions import bitwise_not as bitwise_not +from sqlframe.base.functions import bitwiseNOT as bitwiseNOT +from sqlframe.base.functions import bool_and as bool_and +from sqlframe.base.functions import bool_or as bool_or +from sqlframe.base.functions import broadcast as broadcast +from sqlframe.base.functions import bround as bround +from sqlframe.base.functions import btrim as btrim +from sqlframe.base.functions import bucket as bucket +from sqlframe.base.functions import call_function as call_function +from sqlframe.base.functions import cardinality as cardinality +from sqlframe.base.functions import cbrt as cbrt +from sqlframe.base.functions import ceil as ceil +from sqlframe.base.functions import ceiling as ceiling +from sqlframe.base.functions import char as char +from sqlframe.base.functions import char_length as char_length +from sqlframe.base.functions import character_length as character_length +from sqlframe.base.functions import coalesce as coalesce +from sqlframe.base.functions import col as col +from sqlframe.base.functions import collect_list as collect_list +from sqlframe.base.functions import collect_set as collect_set +from sqlframe.base.functions import concat as concat +from sqlframe.base.functions import concat_ws as concat_ws +from sqlframe.base.functions import contains as contains +from sqlframe.base.functions import conv as conv +from sqlframe.base.functions import convert_timezone as convert_timezone +from sqlframe.base.functions import corr as corr +from sqlframe.base.functions import cos as cos +from sqlframe.base.functions import cosh as cosh +from sqlframe.base.functions import cot as cot +from sqlframe.base.functions import count as count +from sqlframe.base.functions import count_distinct as count_distinct +from sqlframe.base.functions import count_if as count_if +from sqlframe.base.functions import count_min_sketch as count_min_sketch +from sqlframe.base.functions import countDistinct as countDistinct +from sqlframe.base.functions import covar_pop as covar_pop +from sqlframe.base.functions import covar_samp as covar_samp +from sqlframe.base.functions import crc32 as crc32 +from sqlframe.base.functions import create_map as create_map +from sqlframe.base.functions import csc as csc +from sqlframe.base.functions import cume_dist as cume_dist +from sqlframe.base.functions import curdate as curdate +from sqlframe.base.functions import current_catalog as current_catalog +from sqlframe.base.functions import current_database as current_database +from sqlframe.base.functions import current_date as current_date +from sqlframe.base.functions import current_schema as current_schema +from sqlframe.base.functions import current_timestamp as current_timestamp +from sqlframe.base.functions import current_timezone as current_timezone +from sqlframe.base.functions import current_user as current_user +from sqlframe.base.functions import date_add as date_add +from sqlframe.base.functions import date_diff as date_diff +from sqlframe.base.functions import date_format as date_format +from sqlframe.base.functions import date_from_unix_date as date_from_unix_date +from sqlframe.base.functions import date_part as date_part +from sqlframe.base.functions import date_sub as date_sub +from sqlframe.base.functions import date_trunc as date_trunc +from sqlframe.base.functions import dateadd as dateadd +from sqlframe.base.functions import datediff as datediff +from sqlframe.base.functions import datepart as datepart +from sqlframe.base.functions import day as day +from sqlframe.base.functions import dayofmonth as dayofmonth +from sqlframe.base.functions import dayofweek as dayofweek +from sqlframe.base.functions import dayofyear as dayofyear +from sqlframe.base.functions import days as days +from sqlframe.base.functions import decode as decode +from sqlframe.base.functions import degrees as degrees +from sqlframe.base.functions import dense_rank as dense_rank +from sqlframe.base.functions import desc as desc +from sqlframe.base.functions import desc_nulls_first as desc_nulls_first +from sqlframe.base.functions import desc_nulls_last as desc_nulls_last +from sqlframe.base.functions import e as e +from sqlframe.base.functions import element_at as element_at +from sqlframe.base.functions import elt as elt +from sqlframe.base.functions import encode as encode +from sqlframe.base.functions import endswith as endswith +from sqlframe.base.functions import equal_null as equal_null +from sqlframe.base.functions import every as every +from sqlframe.base.functions import exists as exists +from sqlframe.base.functions import exp as exp +from sqlframe.base.functions import explode as explode +from sqlframe.base.functions import explode_outer as explode_outer +from sqlframe.base.functions import expm1 as expm1 +from sqlframe.base.functions import expr as expr +from sqlframe.base.functions import extract as extract +from sqlframe.base.functions import factorial as factorial +from sqlframe.base.functions import filter as filter +from sqlframe.base.functions import find_in_set as find_in_set +from sqlframe.base.functions import first as first +from sqlframe.base.functions import first_value as first_value +from sqlframe.base.functions import flatten as flatten +from sqlframe.base.functions import floor as floor +from sqlframe.base.functions import forall as forall +from sqlframe.base.functions import format_number as format_number +from sqlframe.base.functions import format_string as format_string +from sqlframe.base.functions import from_csv as from_csv +from sqlframe.base.functions import from_json as from_json +from sqlframe.base.functions import from_unixtime as from_unixtime +from sqlframe.base.functions import from_utc_timestamp as from_utc_timestamp +from sqlframe.base.functions import get as get +from sqlframe.base.functions import get_active_spark_context as get_active_spark_context +from sqlframe.base.functions import get_json_object as get_json_object +from sqlframe.base.functions import getbit as getbit +from sqlframe.base.functions import greatest as greatest +from sqlframe.base.functions import grouping as grouping +from sqlframe.base.functions import grouping_id as grouping_id +from sqlframe.base.functions import hash as hash +from sqlframe.base.functions import hex as hex +from sqlframe.base.functions import histogram_numeric as histogram_numeric +from sqlframe.base.functions import hll_sketch_agg as hll_sketch_agg +from sqlframe.base.functions import hll_sketch_estimate as hll_sketch_estimate +from sqlframe.base.functions import hll_union as hll_union +from sqlframe.base.functions import hll_union_agg as hll_union_agg +from sqlframe.base.functions import hour as hour +from sqlframe.base.functions import hours as hours +from sqlframe.base.functions import hypot as hypot +from sqlframe.base.functions import ifnull as ifnull +from sqlframe.base.functions import ilike as ilike +from sqlframe.base.functions import initcap as initcap +from sqlframe.base.functions import inline as inline +from sqlframe.base.functions import inline_outer as inline_outer +from sqlframe.base.functions import input_file_name as input_file_name +from sqlframe.base.functions import instr as instr +from sqlframe.base.functions import isnan as isnan +from sqlframe.base.functions import isnotnull as isnotnull +from sqlframe.base.functions import isnull as isnull +from sqlframe.base.functions import java_method as java_method +from sqlframe.base.functions import json_array_length as json_array_length +from sqlframe.base.functions import json_object_keys as json_object_keys +from sqlframe.base.functions import json_tuple as json_tuple +from sqlframe.base.functions import kurtosis as kurtosis +from sqlframe.base.functions import lag as lag +from sqlframe.base.functions import last as last +from sqlframe.base.functions import last_day as last_day +from sqlframe.base.functions import last_value as last_value +from sqlframe.base.functions import lcase as lcase +from sqlframe.base.functions import lead as lead +from sqlframe.base.functions import least as least +from sqlframe.base.functions import left as left +from sqlframe.base.functions import length as length +from sqlframe.base.functions import levenshtein as levenshtein +from sqlframe.base.functions import like as like +from sqlframe.base.functions import lit as lit +from sqlframe.base.functions import ln as ln +from sqlframe.base.functions import localtimestamp as localtimestamp +from sqlframe.base.functions import locate as locate +from sqlframe.base.functions import log as log +from sqlframe.base.functions import log1p as log1p +from sqlframe.base.functions import log2 as log2 +from sqlframe.base.functions import log10 as log10 +from sqlframe.base.functions import lower as lower +from sqlframe.base.functions import lpad as lpad +from sqlframe.base.functions import ltrim as ltrim +from sqlframe.base.functions import make_date as make_date +from sqlframe.base.functions import make_dt_interval as make_dt_interval +from sqlframe.base.functions import make_interval as make_interval +from sqlframe.base.functions import make_timestamp as make_timestamp +from sqlframe.base.functions import make_timestamp_ltz as make_timestamp_ltz +from sqlframe.base.functions import make_timestamp_ntz as make_timestamp_ntz +from sqlframe.base.functions import make_ym_interval as make_ym_interval +from sqlframe.base.functions import map_concat as map_concat +from sqlframe.base.functions import map_contains_key as map_contains_key +from sqlframe.base.functions import map_entries as map_entries +from sqlframe.base.functions import map_filter as map_filter +from sqlframe.base.functions import map_from_arrays as map_from_arrays +from sqlframe.base.functions import map_from_entries as map_from_entries +from sqlframe.base.functions import map_keys as map_keys +from sqlframe.base.functions import map_values as map_values +from sqlframe.base.functions import map_zip_with as map_zip_with +from sqlframe.base.functions import mask as mask +from sqlframe.base.functions import max as max +from sqlframe.base.functions import max_by as max_by +from sqlframe.base.functions import md5 as md5 +from sqlframe.base.functions import mean as mean +from sqlframe.base.functions import median as median +from sqlframe.base.functions import min as min +from sqlframe.base.functions import min_by as min_by +from sqlframe.base.functions import minute as minute +from sqlframe.base.functions import mode as mode from sqlframe.base.functions import ( - abs as abs, - acos as acos, - acosh as acosh, - aes_decrypt as aes_decrypt, - aes_encrypt as aes_encrypt, - aggregate as aggregate, - any_value as any_value, - approxCountDistinct as approxCountDistinct, - approx_count_distinct as approx_count_distinct, - approx_percentile as approx_percentile, - array as array, - array_agg as array_agg, - array_append as array_append, - array_compact as array_compact, - array_contains as array_contains, - array_distinct as array_distinct, - array_except as array_except, - array_insert as array_insert, - array_intersect as array_intersect, - array_join as array_join, - array_max as array_max, - array_min as array_min, - array_position as array_position, - array_prepend as array_prepend, - array_remove as array_remove, - array_repeat as array_repeat, - array_size as array_size, - array_sort as array_sort, - array_union as array_union, - arrays_zip as arrays_zip, - asc as asc, - asc_nulls_first as asc_nulls_first, - asc_nulls_last as asc_nulls_last, - ascii as ascii, - asin as asin, - asinh as asinh, - assert_true as assert_true, - atan as atan, - atan2 as atan2, - atanh as atanh, - avg as avg, - base64 as base64, - bin as bin, - bit_and as bit_and, - bit_count as bit_count, - bit_get as bit_get, - bit_length as bit_length, - bit_or as bit_or, - bit_xor as bit_xor, - bitmap_bit_position as bitmap_bit_position, - bitmap_bucket_number as bitmap_bucket_number, - bitmap_construct_agg as bitmap_construct_agg, - bitmap_count as bitmap_count, - bitmap_or_agg as bitmap_or_agg, - bitwiseNOT as bitwiseNOT, - bitwise_not as bitwise_not, - bool_and as bool_and, - bool_or as bool_or, - broadcast as broadcast, - bround as bround, - btrim as btrim, - bucket as bucket, - call_function as call_function, - cardinality as cardinality, - cbrt as cbrt, - ceil as ceil, - ceiling as ceiling, - char as char, - char_length as char_length, - character_length as character_length, - coalesce as coalesce, - col as col, - collect_list as collect_list, - collect_set as collect_set, - concat as concat, - concat_ws as concat_ws, - contains as contains, - conv as conv, - convert_timezone as convert_timezone, - corr as corr, - cos as cos, - cosh as cosh, - cot as cot, - count as count, - countDistinct as countDistinct, - count_distinct as count_distinct, - count_if as count_if, - count_min_sketch as count_min_sketch, - covar_pop as covar_pop, - covar_samp as covar_samp, - crc32 as crc32, - create_map as create_map, - csc as csc, - cume_dist as cume_dist, - curdate as curdate, - current_catalog as current_catalog, - current_database as current_database, - current_date as current_date, - current_schema as current_schema, - current_timestamp as current_timestamp, - current_timezone as current_timezone, - current_user as current_user, - date_add as date_add, - date_diff as date_diff, - date_format as date_format, - date_from_unix_date as date_from_unix_date, - date_part as date_part, - date_sub as date_sub, - date_trunc as date_trunc, - dateadd as dateadd, - datediff as datediff, - datepart as datepart, - day as day, - dayofmonth as dayofmonth, - dayofweek as dayofweek, - dayofyear as dayofyear, - days as days, - decode as decode, - degrees as degrees, - dense_rank as dense_rank, - desc as desc, - desc_nulls_first as desc_nulls_first, - desc_nulls_last as desc_nulls_last, - e as e, - element_at as element_at, - elt as elt, - encode as encode, - endswith as endswith, - equal_null as equal_null, - every as every, - exists as exists, - exp as exp, - explode as explode, - explode_outer as explode_outer, - expm1 as expm1, - expr as expr, - extract as extract, - factorial as factorial, - filter as filter, - find_in_set as find_in_set, - first as first, - first_value as first_value, - flatten as flatten, - floor as floor, - forall as forall, - format_number as format_number, - format_string as format_string, - from_csv as from_csv, - from_json as from_json, - from_unixtime as from_unixtime, - from_utc_timestamp as from_utc_timestamp, - get as get, - get_active_spark_context as get_active_spark_context, - get_json_object as get_json_object, - getbit as getbit, - greatest as greatest, - grouping as grouping, - grouping_id as grouping_id, - hash as hash, - hex as hex, - histogram_numeric as histogram_numeric, - hll_sketch_agg as hll_sketch_agg, - hll_sketch_estimate as hll_sketch_estimate, - hll_union as hll_union, - hll_union_agg as hll_union_agg, - hour as hour, - hours as hours, - hypot as hypot, - ifnull as ifnull, - ilike as ilike, - initcap as initcap, - inline as inline, - inline_outer as inline_outer, - input_file_name as input_file_name, - instr as instr, - isnan as isnan, - isnotnull as isnotnull, - isnull as isnull, - java_method as java_method, - json_array_length as json_array_length, - json_object_keys as json_object_keys, - json_tuple as json_tuple, - kurtosis as kurtosis, - lag as lag, - last as last, - last_day as last_day, - last_value as last_value, - lcase as lcase, - lead as lead, - least as least, - left as left, - length as length, - levenshtein as levenshtein, - like as like, - lit as lit, - ln as ln, - localtimestamp as localtimestamp, - locate as locate, - log as log, - log10 as log10, - log1p as log1p, - log2 as log2, - lower as lower, - lpad as lpad, - ltrim as ltrim, - make_date as make_date, - make_dt_interval as make_dt_interval, - make_interval as make_interval, - make_timestamp as make_timestamp, - make_timestamp_ltz as make_timestamp_ltz, - make_timestamp_ntz as make_timestamp_ntz, - make_ym_interval as make_ym_interval, - map_concat as map_concat, - map_contains_key as map_contains_key, - map_entries as map_entries, - map_filter as map_filter, - map_from_arrays as map_from_arrays, - map_from_entries as map_from_entries, - map_keys as map_keys, - map_values as map_values, - map_zip_with as map_zip_with, - mask as mask, - max as max, - max_by as max_by, - md5 as md5, - mean as mean, - median as median, - min as min, - min_by as min_by, - minute as minute, - mode as mode, monotonically_increasing_id as monotonically_increasing_id, - month as month, - months as months, - months_between as months_between, - named_struct as named_struct, - nanvl as nanvl, - negate as negate, - negative as negative, - next_day as next_day, - now as now, - nth_value as nth_value, - ntile as ntile, - nullif as nullif, - nvl as nvl, - nvl2 as nvl2, - octet_length as octet_length, - overlay as overlay, - parse_url as parse_url, - percent_rank as percent_rank, - percentile_approx as percentile_approx, - pi as pi, - pmod as pmod, - posexplode as posexplode, - posexplode_outer as posexplode_outer, - position as position, - positive as positive, - pow as pow, - power as power, - printf as printf, - quarter as quarter, - radians as radians, - raise_error as raise_error, - rand as rand, - randn as randn, - rank as rank, - reduce as reduce, - reflect as reflect, - regexp as regexp, - regexp_count as regexp_count, - regexp_extract as regexp_extract, - regexp_extract_all as regexp_extract_all, - regexp_instr as regexp_instr, - regexp_like as regexp_like, - regexp_replace as regexp_replace, - regexp_substr as regexp_substr, - regr_avgx as regr_avgx, - regr_avgy as regr_avgy, - regr_count as regr_count, - regr_intercept as regr_intercept, - regr_r2 as regr_r2, - regr_slope as regr_slope, - regr_sxx as regr_sxx, - regr_sxy as regr_sxy, - regr_syy as regr_syy, - repeat as repeat, - replace as replace, - reverse as reverse, - right as right, - rint as rint, - rlike as rlike, - round as round, - row_number as row_number, - rpad as rpad, - rtrim as rtrim, - schema_of_csv as schema_of_csv, - schema_of_json as schema_of_json, - sec as sec, - second as second, - sentences as sentences, - sequence as sequence, - sha as sha, - sha1 as sha1, - sha2 as sha2, - shiftLeft as shiftLeft, - shiftRight as shiftRight, - shiftRightUnsigned as shiftRightUnsigned, - shiftleft as shiftleft, - shiftright as shiftright, - shiftrightunsigned as shiftrightunsigned, - shuffle as shuffle, - sign as sign, - signum as signum, - sin as sin, - sinh as sinh, - size as size, - skewness as skewness, - slice as slice, - some as some, - sort_array as sort_array, - soundex as soundex, - spark_partition_id as spark_partition_id, - split as split, - split_part as split_part, - sqrt as sqrt, - stack as stack, - startswith as startswith, - std as std, - stddev as stddev, - stddev_pop as stddev_pop, - stddev_samp as stddev_samp, - str_to_map as str_to_map, - struct as struct, - substr as substr, - substring as substring, - substring_index as substring_index, - sum as sum, - sumDistinct as sumDistinct, - sum_distinct as sum_distinct, - tan as tan, - tanh as tanh, - timestamp_micros as timestamp_micros, - timestamp_millis as timestamp_millis, - timestamp_seconds as timestamp_seconds, - toDegrees as toDegrees, - toRadians as toRadians, - to_binary as to_binary, - to_char as to_char, - to_csv as to_csv, - to_date as to_date, - to_json as to_json, - to_number as to_number, - to_timestamp as to_timestamp, - to_timestamp_ltz as to_timestamp_ltz, - to_timestamp_ntz as to_timestamp_ntz, - to_unix_timestamp as to_unix_timestamp, - to_utc_timestamp as to_utc_timestamp, - to_varchar as to_varchar, - transform as transform, - transform_keys as transform_keys, - transform_values as transform_values, - translate as translate, - trim as trim, - trunc as trunc, - try_add as try_add, - try_aes_decrypt as try_aes_decrypt, - try_avg as try_avg, - try_divide as try_divide, - try_multiply as try_multiply, - try_subtract as try_subtract, - try_sum as try_sum, - try_to_binary as try_to_binary, - try_to_number as try_to_number, - try_to_timestamp as try_to_timestamp, - typeof as typeof, - ucase as ucase, - unbase64 as unbase64, - unhex as unhex, - unix_date as unix_date, - unix_micros as unix_micros, - unix_millis as unix_millis, - unix_seconds as unix_seconds, - unix_timestamp as unix_timestamp, - upper as upper, - url_decode as url_decode, - url_encode as url_encode, - user as user, - var_pop as var_pop, - var_samp as var_samp, - variance as variance, - version as version, - weekday as weekday, - weekofyear as weekofyear, - when as when, - width_bucket as width_bucket, - xpath as xpath, - xpath_boolean as xpath_boolean, - xpath_double as xpath_double, - xpath_float as xpath_float, - xpath_int as xpath_int, - xpath_long as xpath_long, - xpath_number as xpath_number, - xpath_short as xpath_short, - xpath_string as xpath_string, - xxhash64 as xxhash64, - year as year, - years as years, - zip_with as zip_with, ) +from sqlframe.base.functions import month as month +from sqlframe.base.functions import months as months +from sqlframe.base.functions import months_between as months_between +from sqlframe.base.functions import named_struct as named_struct +from sqlframe.base.functions import nanvl as nanvl +from sqlframe.base.functions import negate as negate +from sqlframe.base.functions import negative as negative +from sqlframe.base.functions import next_day as next_day +from sqlframe.base.functions import now as now +from sqlframe.base.functions import nth_value as nth_value +from sqlframe.base.functions import ntile as ntile +from sqlframe.base.functions import nullif as nullif +from sqlframe.base.functions import nvl as nvl +from sqlframe.base.functions import nvl2 as nvl2 +from sqlframe.base.functions import octet_length as octet_length +from sqlframe.base.functions import overlay as overlay +from sqlframe.base.functions import parse_url as parse_url +from sqlframe.base.functions import percent_rank as percent_rank +from sqlframe.base.functions import percentile as percentile +from sqlframe.base.functions import percentile_approx as percentile_approx +from sqlframe.base.functions import pi as pi +from sqlframe.base.functions import pmod as pmod +from sqlframe.base.functions import posexplode as posexplode +from sqlframe.base.functions import posexplode_outer as posexplode_outer +from sqlframe.base.functions import position as position +from sqlframe.base.functions import positive as positive +from sqlframe.base.functions import pow as pow +from sqlframe.base.functions import power as power +from sqlframe.base.functions import printf as printf +from sqlframe.base.functions import quarter as quarter +from sqlframe.base.functions import radians as radians +from sqlframe.base.functions import raise_error as raise_error +from sqlframe.base.functions import rand as rand +from sqlframe.base.functions import randn as randn +from sqlframe.base.functions import rank as rank +from sqlframe.base.functions import reduce as reduce +from sqlframe.base.functions import reflect as reflect +from sqlframe.base.functions import regexp as regexp +from sqlframe.base.functions import regexp_count as regexp_count +from sqlframe.base.functions import regexp_extract as regexp_extract +from sqlframe.base.functions import regexp_extract_all as regexp_extract_all +from sqlframe.base.functions import regexp_instr as regexp_instr +from sqlframe.base.functions import regexp_like as regexp_like +from sqlframe.base.functions import regexp_replace as regexp_replace +from sqlframe.base.functions import regexp_substr as regexp_substr +from sqlframe.base.functions import regr_avgx as regr_avgx +from sqlframe.base.functions import regr_avgy as regr_avgy +from sqlframe.base.functions import regr_count as regr_count +from sqlframe.base.functions import regr_intercept as regr_intercept +from sqlframe.base.functions import regr_r2 as regr_r2 +from sqlframe.base.functions import regr_slope as regr_slope +from sqlframe.base.functions import regr_sxx as regr_sxx +from sqlframe.base.functions import regr_sxy as regr_sxy +from sqlframe.base.functions import regr_syy as regr_syy +from sqlframe.base.functions import repeat as repeat +from sqlframe.base.functions import replace as replace +from sqlframe.base.functions import reverse as reverse +from sqlframe.base.functions import right as right +from sqlframe.base.functions import rint as rint +from sqlframe.base.functions import rlike as rlike +from sqlframe.base.functions import round as round +from sqlframe.base.functions import row_number as row_number +from sqlframe.base.functions import rpad as rpad +from sqlframe.base.functions import rtrim as rtrim +from sqlframe.base.functions import schema_of_csv as schema_of_csv +from sqlframe.base.functions import schema_of_json as schema_of_json +from sqlframe.base.functions import sec as sec +from sqlframe.base.functions import second as second +from sqlframe.base.functions import sentences as sentences +from sqlframe.base.functions import sequence as sequence +from sqlframe.base.functions import sha as sha +from sqlframe.base.functions import sha1 as sha1 +from sqlframe.base.functions import sha2 as sha2 +from sqlframe.base.functions import shiftLeft as shiftLeft +from sqlframe.base.functions import shiftleft as shiftleft +from sqlframe.base.functions import shiftRight as shiftRight +from sqlframe.base.functions import shiftright as shiftright +from sqlframe.base.functions import shiftRightUnsigned as shiftRightUnsigned +from sqlframe.base.functions import shiftrightunsigned as shiftrightunsigned +from sqlframe.base.functions import shuffle as shuffle +from sqlframe.base.functions import sign as sign +from sqlframe.base.functions import signum as signum +from sqlframe.base.functions import sin as sin +from sqlframe.base.functions import sinh as sinh +from sqlframe.base.functions import size as size +from sqlframe.base.functions import skewness as skewness +from sqlframe.base.functions import slice as slice +from sqlframe.base.functions import some as some +from sqlframe.base.functions import sort_array as sort_array +from sqlframe.base.functions import soundex as soundex +from sqlframe.base.functions import spark_partition_id as spark_partition_id +from sqlframe.base.functions import split as split +from sqlframe.base.functions import split_part as split_part +from sqlframe.base.functions import sqrt as sqrt +from sqlframe.base.functions import stack as stack +from sqlframe.base.functions import startswith as startswith +from sqlframe.base.functions import std as std +from sqlframe.base.functions import stddev as stddev +from sqlframe.base.functions import stddev_pop as stddev_pop +from sqlframe.base.functions import stddev_samp as stddev_samp +from sqlframe.base.functions import str_to_map as str_to_map +from sqlframe.base.functions import struct as struct +from sqlframe.base.functions import substr as substr +from sqlframe.base.functions import substring as substring +from sqlframe.base.functions import substring_index as substring_index +from sqlframe.base.functions import sum as sum +from sqlframe.base.functions import sum_distinct as sum_distinct +from sqlframe.base.functions import sumDistinct as sumDistinct +from sqlframe.base.functions import tan as tan +from sqlframe.base.functions import tanh as tanh +from sqlframe.base.functions import timestamp_micros as timestamp_micros +from sqlframe.base.functions import timestamp_millis as timestamp_millis +from sqlframe.base.functions import timestamp_seconds as timestamp_seconds +from sqlframe.base.functions import to_binary as to_binary +from sqlframe.base.functions import to_char as to_char +from sqlframe.base.functions import to_csv as to_csv +from sqlframe.base.functions import to_date as to_date +from sqlframe.base.functions import to_json as to_json +from sqlframe.base.functions import to_number as to_number +from sqlframe.base.functions import to_timestamp as to_timestamp +from sqlframe.base.functions import to_timestamp_ltz as to_timestamp_ltz +from sqlframe.base.functions import to_timestamp_ntz as to_timestamp_ntz +from sqlframe.base.functions import to_unix_timestamp as to_unix_timestamp +from sqlframe.base.functions import to_utc_timestamp as to_utc_timestamp +from sqlframe.base.functions import to_varchar as to_varchar +from sqlframe.base.functions import toDegrees as toDegrees +from sqlframe.base.functions import toRadians as toRadians +from sqlframe.base.functions import transform as transform +from sqlframe.base.functions import transform_keys as transform_keys +from sqlframe.base.functions import transform_values as transform_values +from sqlframe.base.functions import translate as translate +from sqlframe.base.functions import trim as trim +from sqlframe.base.functions import trunc as trunc +from sqlframe.base.functions import try_add as try_add +from sqlframe.base.functions import try_aes_decrypt as try_aes_decrypt +from sqlframe.base.functions import try_avg as try_avg +from sqlframe.base.functions import try_divide as try_divide +from sqlframe.base.functions import try_element_at as try_element_at +from sqlframe.base.functions import try_multiply as try_multiply +from sqlframe.base.functions import try_subtract as try_subtract +from sqlframe.base.functions import try_sum as try_sum +from sqlframe.base.functions import try_to_binary as try_to_binary +from sqlframe.base.functions import try_to_number as try_to_number +from sqlframe.base.functions import try_to_timestamp as try_to_timestamp +from sqlframe.base.functions import typeof as typeof +from sqlframe.base.functions import ucase as ucase +from sqlframe.base.functions import unbase64 as unbase64 +from sqlframe.base.functions import unhex as unhex +from sqlframe.base.functions import unix_date as unix_date +from sqlframe.base.functions import unix_micros as unix_micros +from sqlframe.base.functions import unix_millis as unix_millis +from sqlframe.base.functions import unix_seconds as unix_seconds +from sqlframe.base.functions import unix_timestamp as unix_timestamp +from sqlframe.base.functions import upper as upper +from sqlframe.base.functions import url_decode as url_decode +from sqlframe.base.functions import url_encode as url_encode +from sqlframe.base.functions import user as user +from sqlframe.base.functions import var_pop as var_pop +from sqlframe.base.functions import var_samp as var_samp +from sqlframe.base.functions import variance as variance +from sqlframe.base.functions import version as version +from sqlframe.base.functions import weekday as weekday +from sqlframe.base.functions import weekofyear as weekofyear +from sqlframe.base.functions import when as when +from sqlframe.base.functions import width_bucket as width_bucket +from sqlframe.base.functions import xpath as xpath +from sqlframe.base.functions import xpath_boolean as xpath_boolean +from sqlframe.base.functions import xpath_double as xpath_double +from sqlframe.base.functions import xpath_float as xpath_float +from sqlframe.base.functions import xpath_int as xpath_int +from sqlframe.base.functions import xpath_long as xpath_long +from sqlframe.base.functions import xpath_number as xpath_number +from sqlframe.base.functions import xpath_short as xpath_short +from sqlframe.base.functions import xpath_string as xpath_string +from sqlframe.base.functions import xxhash64 as xxhash64 +from sqlframe.base.functions import year as year +from sqlframe.base.functions import years as years +from sqlframe.base.functions import zip_with as zip_with diff --git a/sqlframe/spark/session.py b/sqlframe/spark/session.py index 2052543..bcf5dc0 100644 --- a/sqlframe/spark/session.py +++ b/sqlframe/spark/session.py @@ -120,6 +120,10 @@ def builder(cls) -> Builder: # type: ignore """ return cls.Builder() + @property + def _is_spark(self) -> bool: + return True + class Builder(_BaseSession.Builder): def __init__(self): from pyspark.sql.session import SparkSession diff --git a/sqlframe/standalone/functions.py b/sqlframe/standalone/functions.py index c5e94df..55a7aa0 100644 --- a/sqlframe/standalone/functions.py +++ b/sqlframe/standalone/functions.py @@ -1 +1 @@ -from sqlframe.base.functions import * +from sqlframe.base.functions import * # noqa diff --git a/sqlframe/standalone/session.py b/sqlframe/standalone/session.py index e1bed5c..9ef49b9 100644 --- a/sqlframe/standalone/session.py +++ b/sqlframe/standalone/session.py @@ -28,6 +28,10 @@ class StandaloneSession( _df = StandaloneDataFrame _udf_registration = StandaloneUDFRegistration + @property + def _is_standalone(self) -> bool: + return True + class Builder(_BaseSession.Builder): DEFAULT_INPUT_DIALECT = "spark" DEFAULT_OUTPUT_DIALECT = "spark" diff --git a/tests/integration/engines/test_int_functions.py b/tests/integration/engines/test_int_functions.py index 5ad8721..6e52364 100644 --- a/tests/integration/engines/test_int_functions.py +++ b/tests/integration/engines/test_int_functions.py @@ -2603,7 +2603,9 @@ def test_array_sort(get_session_and_func, get_func): Row(r=[1, 2, 3]), Row(r=[1]), ] - assert df.select(array_sort(df.data, asc=False).alias("r")).collect() == [ + # ASC/DESC is strange on BigQuery but it is from a legacy bug. + # Should be updated to no share the `sort_array` function + assert df.select(array_sort(df.data, comparator=False).alias("r")).collect() == [ Row(r=[3, 2, 1]), Row(r=[1]), ]