From e7194dff6a816bf3a721cbf579ceac19c11cd111 Mon Sep 17 00:00:00 2001 From: Dmitry Astankov Date: Wed, 13 Nov 2024 16:46:46 +0300 Subject: [PATCH] Add support for semicolon stripping to DbApiHook, PrestoHook, and TrinoHook (#41916) --------- Co-authored-by: Elad Kalif <45845474+eladkal@users.noreply.github.com> --- .../src/airflow_breeze/global_constants.py | 2 +- dev/breeze/tests/test_packages.py | 4 +- generated/provider_dependencies.json | 54 +++++++++---------- .../airflow/providers/amazon/provider.yaml | 2 +- .../providers/apache/drill/provider.yaml | 2 +- .../providers/apache/druid/provider.yaml | 2 +- .../providers/apache/hive/provider.yaml | 2 +- .../providers/apache/impala/provider.yaml | 2 +- .../providers/apache/pinot/provider.yaml | 2 +- .../airflow/providers/common/sql/hooks/sql.py | 15 ++++-- .../providers/common/sql/hooks/sql.pyi | 3 +- .../providers/common/sql/provider.yaml | 3 +- .../providers/databricks/provider.yaml | 2 +- .../providers/elasticsearch/provider.yaml | 2 +- .../airflow/providers/exasol/provider.yaml | 2 +- .../airflow/providers/google/provider.yaml | 2 +- .../src/airflow/providers/jdbc/provider.yaml | 2 +- .../providers/microsoft/mssql/provider.yaml | 2 +- .../src/airflow/providers/mysql/provider.yaml | 2 +- .../src/airflow/providers/odbc/provider.yaml | 2 +- .../providers/openlineage/provider.yaml | 3 +- .../providers/openlineage/sqlparser.py | 7 ++- .../airflow/providers/oracle/provider.yaml | 2 +- .../airflow/providers/postgres/provider.yaml | 2 +- .../airflow/providers/presto/hooks/presto.py | 1 + .../airflow/providers/presto/provider.yaml | 2 +- .../src/airflow/providers/slack/provider.yaml | 2 +- .../airflow/providers/snowflake/provider.yaml | 2 +- .../airflow/providers/sqlite/provider.yaml | 2 +- .../airflow/providers/standard/provider.yaml | 2 +- .../airflow/providers/teradata/provider.yaml | 2 +- .../airflow/providers/trino/hooks/trino.py | 1 + .../src/airflow/providers/trino/provider.yaml | 2 +- .../airflow/providers/vertica/provider.yaml | 2 +- .../src/airflow/providers/ydb/provider.yaml | 2 +- providers/tests/presto/hooks/test_presto.py | 10 ++++ providers/tests/trino/hooks/test_trino.py | 10 ++++ 37 files changed, 100 insertions(+), 63 deletions(-) diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index ce0b4f8f2fbf..15cd0ac81cc5 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -568,7 +568,7 @@ def get_airflow_extras(): # END OF EXTRAS LIST UPDATED BY PRE COMMIT ] -CHICKEN_EGG_PROVIDERS = " ".join(["standard amazon"]) +CHICKEN_EGG_PROVIDERS = " ".join(["standard amazon common.sql"]) BASE_PROVIDERS_COMPATIBILITY_CHECKS: list[dict[str, str | list[str]]] = [ diff --git a/dev/breeze/tests/test_packages.py b/dev/breeze/tests/test_packages.py index 390bb34b435f..fe3173b2e9dd 100644 --- a/dev/breeze/tests/test_packages.py +++ b/dev/breeze/tests/test_packages.py @@ -209,7 +209,7 @@ def test_get_documentation_package_path(): "postgres", "beta0", """ - "apache-airflow-providers-common-sql>=1.17.0b0", + "apache-airflow-providers-common-sql>=1.20.0b0", "apache-airflow>=2.8.0b0", "psycopg2-binary>=2.9.4", """, @@ -219,7 +219,7 @@ def test_get_documentation_package_path(): "postgres", "", """ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "psycopg2-binary>=2.9.4", """, diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 1c3de04d2c5d..0fd840c1a9f1 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -27,7 +27,7 @@ "deps": [ "PyAthena>=3.0.10", "apache-airflow-providers-common-compat>=1.2.1", - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow-providers-http", "apache-airflow>=2.8.0", "asgiref>=2.3.0", @@ -102,7 +102,7 @@ }, "apache.drill": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "sqlalchemy-drill>=1.1.0" ], @@ -116,7 +116,7 @@ }, "apache.druid": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pydruid>=0.4.1" ], @@ -159,7 +159,7 @@ }, "apache.hive": { "deps": [ - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "hmsclient>=0.1.0", "jmespath>=0.7.0", @@ -201,7 +201,7 @@ }, "apache.impala": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "impyla>=0.18.0,<1.0" ], @@ -265,7 +265,7 @@ }, "apache.pinot": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pinotdb>=5.1.0" ], @@ -421,7 +421,7 @@ "deps": [ "apache-airflow>=2.8.0", "more-itertools>=9.0.0", - "sqlparse>=0.4.2" + "sqlparse>=0.5.1" ], "devel-deps": [], "plugins": [], @@ -434,7 +434,7 @@ "databricks": { "deps": [ "aiohttp>=3.9.2, <4", - "apache-airflow-providers-common-sql>=1.10.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0", "mergedeep>=1.3.4", @@ -545,7 +545,7 @@ }, "elasticsearch": { "deps": [ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "elasticsearch>=8.10,<9" ], @@ -559,7 +559,7 @@ }, "exasol": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pandas>=1.5.3,<2.2;python_version<\"3.9\"", "pandas>=2.1.2,<2.2;python_version>=\"3.9\"", @@ -632,7 +632,7 @@ "deps": [ "PyOpenSSL>=23.0.0", "apache-airflow-providers-common-compat>=1.2.1", - "apache-airflow-providers-common-sql>=1.7.2", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "asgiref>=3.5.2", "dill>=0.2.3", @@ -787,7 +787,7 @@ }, "jdbc": { "deps": [ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "jaydebeapi>=1.1.1" ], @@ -855,7 +855,7 @@ }, "microsoft.mssql": { "deps": [ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "methodtools>=0.4.7", "pymssql>=2.3.0" @@ -906,7 +906,7 @@ }, "mysql": { "deps": [ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "mysql-connector-python>=8.0.29", "mysqlclient>=1.4.0; sys_platform != 'darwin'" @@ -937,7 +937,7 @@ }, "odbc": { "deps": [ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pyodbc>=5.0.0" ], @@ -973,7 +973,7 @@ "openlineage": { "deps": [ "apache-airflow-providers-common-compat>=1.2.1", - "apache-airflow-providers-common-sql>=1.6.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "attrs>=22.2", "openlineage-integration-common>=1.24.2", @@ -1017,7 +1017,7 @@ }, "oracle": { "deps": [ - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "oracledb>=2.0.0" ], @@ -1083,7 +1083,7 @@ }, "postgres": { "deps": [ - "apache-airflow-providers-common-sql>=1.17.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "psycopg2-binary>=2.9.4" ], @@ -1099,7 +1099,7 @@ }, "presto": { "deps": [ - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pandas>=1.5.3,<2.2;python_version<\"3.9\"", "pandas>=2.1.2,<2.2;python_version>=\"3.9\"", @@ -1214,7 +1214,7 @@ }, "slack": { "deps": [ - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "slack_sdk>=3.19.0" ], @@ -1239,7 +1239,7 @@ "snowflake": { "deps": [ "apache-airflow-providers-common-compat>=1.1.0", - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pandas>=1.5.3,<2.2;python_version<\"3.9\"", "pandas>=2.1.2,<2.2;python_version>=\"3.9\"", @@ -1260,7 +1260,7 @@ }, "sqlite": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0" ], "devel-deps": [], @@ -1285,7 +1285,7 @@ }, "standard": { "deps": [ - "apache-airflow-providers-common-sql>=1.18.0", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0" ], "devel-deps": [], @@ -1318,7 +1318,7 @@ }, "teradata": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "teradatasql>=17.20.0.28", "teradatasqlalchemy>=17.20.0.0" @@ -1335,7 +1335,7 @@ }, "trino": { "deps": [ - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "pandas>=1.5.3,<2.2;python_version<\"3.9\"", "pandas>=2.1.2,<2.2;python_version>=\"3.9\"", @@ -1353,7 +1353,7 @@ }, "vertica": { "deps": [ - "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "vertica-python>=0.6.0" ], @@ -1393,7 +1393,7 @@ }, "ydb": { "deps": [ - "apache-airflow-providers-common-sql>=1.14.1", + "apache-airflow-providers-common-sql>=1.20.0", "apache-airflow>=2.8.0", "ydb-dbapi>=0.1.0", "ydb>=3.18.8" diff --git a/providers/src/airflow/providers/amazon/provider.yaml b/providers/src/airflow/providers/amazon/provider.yaml index 2e49d8845b20..9cebb32d488e 100644 --- a/providers/src/airflow/providers/amazon/provider.yaml +++ b/providers/src/airflow/providers/amazon/provider.yaml @@ -95,7 +95,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - apache-airflow-providers-common-compat>=1.2.1 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 - apache-airflow-providers-http # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number # of candidates to consider. Make sure to configure boto3 version here as well as in all the tools below diff --git a/providers/src/airflow/providers/apache/drill/provider.yaml b/providers/src/airflow/providers/apache/drill/provider.yaml index 8512112bf092..f649512903ab 100644 --- a/providers/src/airflow/providers/apache/drill/provider.yaml +++ b/providers/src/airflow/providers/apache/drill/provider.yaml @@ -53,7 +53,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - sqlalchemy-drill>=1.1.0 integrations: diff --git a/providers/src/airflow/providers/apache/druid/provider.yaml b/providers/src/airflow/providers/apache/druid/provider.yaml index c67d658f8ac2..416602ea0344 100644 --- a/providers/src/airflow/providers/apache/druid/provider.yaml +++ b/providers/src/airflow/providers/apache/druid/provider.yaml @@ -60,7 +60,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - pydruid>=0.4.1 integrations: diff --git a/providers/src/airflow/providers/apache/hive/provider.yaml b/providers/src/airflow/providers/apache/hive/provider.yaml index 1ee8e62f4caf..1cfdf10fe563 100644 --- a/providers/src/airflow/providers/apache/hive/provider.yaml +++ b/providers/src/airflow/providers/apache/hive/provider.yaml @@ -73,7 +73,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 - hmsclient>=0.1.0 # In pandas 2.2 minimal version of the sqlalchemy is 2.0 # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies diff --git a/providers/src/airflow/providers/apache/impala/provider.yaml b/providers/src/airflow/providers/apache/impala/provider.yaml index d7958369cff9..3dc44b77d83a 100644 --- a/providers/src/airflow/providers/apache/impala/provider.yaml +++ b/providers/src/airflow/providers/apache/impala/provider.yaml @@ -41,7 +41,7 @@ versions: dependencies: - impyla>=0.18.0,<1.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - apache-airflow>=2.8.0 additional-extras: diff --git a/providers/src/airflow/providers/apache/pinot/provider.yaml b/providers/src/airflow/providers/apache/pinot/provider.yaml index befaf091e276..ef0a9ae5d3c9 100644 --- a/providers/src/airflow/providers/apache/pinot/provider.yaml +++ b/providers/src/airflow/providers/apache/pinot/provider.yaml @@ -53,7 +53,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - pinotdb>=5.1.0 integrations: diff --git a/providers/src/airflow/providers/common/sql/hooks/sql.py b/providers/src/airflow/providers/common/sql/hooks/sql.py index 60c659e340fe..4904bc23f8da 100644 --- a/providers/src/airflow/providers/common/sql/hooks/sql.py +++ b/providers/src/airflow/providers/common/sql/hooks/sql.py @@ -150,6 +150,8 @@ class DbApiHook(BaseHook): conn_name_attr: str # Override to have a default connection id for a particular dbHook default_conn_name = "default_conn_id" + # Override if this db doesn't support semicolons in SQL queries + strip_semicolon = False # Override if this db supports autocommit. supports_autocommit = False # Override if this db supports executemany. @@ -369,14 +371,18 @@ def strip_sql_string(sql: str) -> str: return sql.strip().rstrip(";") @staticmethod - def split_sql_string(sql: str) -> list[str]: + def split_sql_string(sql: str, strip_semicolon: bool = False) -> list[str]: """ Split string into multiple SQL expressions. :param sql: SQL string potentially consisting of multiple expressions + :param strip_semicolon: whether to strip semicolon from SQL string :return: list of individual expressions """ - splits = sqlparse.split(sqlparse.format(sql, strip_comments=True)) + splits = sqlparse.split( + sql=sqlparse.format(sql, strip_comments=True), + strip_semicolon=strip_semicolon, + ) return [s for s in splits if s] @property @@ -471,7 +477,10 @@ def run( if isinstance(sql, str): if split_statements: - sql_list: Iterable[str] = self.split_sql_string(sql) + sql_list: Iterable[str] = self.split_sql_string( + sql=sql, + strip_semicolon=self.strip_semicolon, + ) else: sql_list = [sql] if sql.strip() else [] else: diff --git a/providers/src/airflow/providers/common/sql/hooks/sql.pyi b/providers/src/airflow/providers/common/sql/hooks/sql.pyi index 0039733d966a..ed93958401ed 100644 --- a/providers/src/airflow/providers/common/sql/hooks/sql.pyi +++ b/providers/src/airflow/providers/common/sql/hooks/sql.pyi @@ -54,6 +54,7 @@ class ConnectorProtocol(Protocol): class DbApiHook(BaseHook): conn_name_attr: str default_conn_name: str + strip_semicolon: bool supports_autocommit: bool supports_executemany: bool connector: ConnectorProtocol | None @@ -93,7 +94,7 @@ class DbApiHook(BaseHook): @staticmethod def strip_sql_string(sql: str) -> str: ... @staticmethod - def split_sql_string(sql: str) -> list[str]: ... + def split_sql_string(sql: str, strip_semicolon: bool = False) -> list[str]: ... @property def last_description(self) -> Sequence[Sequence] | None: ... @overload diff --git a/providers/src/airflow/providers/common/sql/provider.yaml b/providers/src/airflow/providers/common/sql/provider.yaml index f02516707277..42fd9d72dc83 100644 --- a/providers/src/airflow/providers/common/sql/provider.yaml +++ b/providers/src/airflow/providers/common/sql/provider.yaml @@ -25,6 +25,7 @@ state: ready source-date-epoch: 1730012422 # note that those versions are maintained by release manager - do not update them manually versions: + - 1.20.0 - 1.19.0 - 1.18.0 - 1.17.1 @@ -64,7 +65,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - sqlparse>=0.4.2 + - sqlparse>=0.5.1 - more-itertools>=9.0.0 additional-extras: diff --git a/providers/src/airflow/providers/databricks/provider.yaml b/providers/src/airflow/providers/databricks/provider.yaml index fe2bb9f7fde4..d7ec6c2f8925 100644 --- a/providers/src/airflow/providers/databricks/provider.yaml +++ b/providers/src/airflow/providers/databricks/provider.yaml @@ -72,7 +72,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.10.0 + - apache-airflow-providers-common-sql>=1.20.0 - requests>=2.27.0,<3 # The connector 2.9.0 released on Aug 10, 2023 has a bug that it does not properly declare urllib3 and # it needs to be excluded. See https://github.com/databricks/databricks-sql-python/issues/190 diff --git a/providers/src/airflow/providers/elasticsearch/provider.yaml b/providers/src/airflow/providers/elasticsearch/provider.yaml index 595a3d45be5c..a3fa966f2ac6 100644 --- a/providers/src/airflow/providers/elasticsearch/provider.yaml +++ b/providers/src/airflow/providers/elasticsearch/provider.yaml @@ -70,7 +70,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.17.0 + - apache-airflow-providers-common-sql>=1.20.0 - elasticsearch>=8.10,<9 integrations: diff --git a/providers/src/airflow/providers/exasol/provider.yaml b/providers/src/airflow/providers/exasol/provider.yaml index bca92be13770..cfc3a6fdd45f 100644 --- a/providers/src/airflow/providers/exasol/provider.yaml +++ b/providers/src/airflow/providers/exasol/provider.yaml @@ -61,7 +61,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - pyexasol>=0.5.1 # In pandas 2.2 minimal version of the sqlalchemy is 2.0 # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies diff --git a/providers/src/airflow/providers/google/provider.yaml b/providers/src/airflow/providers/google/provider.yaml index 1790008b3a33..0d5fb07f8e04 100644 --- a/providers/src/airflow/providers/google/provider.yaml +++ b/providers/src/airflow/providers/google/provider.yaml @@ -99,7 +99,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - apache-airflow-providers-common-compat>=1.2.1 - - apache-airflow-providers-common-sql>=1.7.2 + - apache-airflow-providers-common-sql>=1.20.0 - asgiref>=3.5.2 - dill>=0.2.3 - gcloud-aio-auth>=5.2.0 diff --git a/providers/src/airflow/providers/jdbc/provider.yaml b/providers/src/airflow/providers/jdbc/provider.yaml index 5165d3393961..364bc9f451dc 100644 --- a/providers/src/airflow/providers/jdbc/provider.yaml +++ b/providers/src/airflow/providers/jdbc/provider.yaml @@ -55,7 +55,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.17.0 + - apache-airflow-providers-common-sql>=1.20.0 - jaydebeapi>=1.1.1 integrations: diff --git a/providers/src/airflow/providers/microsoft/mssql/provider.yaml b/providers/src/airflow/providers/microsoft/mssql/provider.yaml index 8bee7393ccf2..8fbda1aefab9 100644 --- a/providers/src/airflow/providers/microsoft/mssql/provider.yaml +++ b/providers/src/airflow/providers/microsoft/mssql/provider.yaml @@ -56,7 +56,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.17.0 + - apache-airflow-providers-common-sql>=1.20.0 - pymssql>=2.3.0 # The methodtools dependency can be removed with min airflow version >=2.9.1 # as it was added in https://github.com/apache/airflow/pull/37757 diff --git a/providers/src/airflow/providers/mysql/provider.yaml b/providers/src/airflow/providers/mysql/provider.yaml index a1ab2229dfd4..19f53bb141f8 100644 --- a/providers/src/airflow/providers/mysql/provider.yaml +++ b/providers/src/airflow/providers/mysql/provider.yaml @@ -69,7 +69,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.17.0 + - apache-airflow-providers-common-sql>=1.20.0 # The mysqlclient package creates friction when installing on MacOS as it needs pkg-config to # Install and compile, and it's really only used by MySQL provider, so we can skip it on MacOS # Instead, if someone attempts to use it on MacOS, they will get explanatory error on how to install it diff --git a/providers/src/airflow/providers/odbc/provider.yaml b/providers/src/airflow/providers/odbc/provider.yaml index 9904e727c878..fb3c71603109 100644 --- a/providers/src/airflow/providers/odbc/provider.yaml +++ b/providers/src/airflow/providers/odbc/provider.yaml @@ -56,7 +56,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.17.0 + - apache-airflow-providers-common-sql>=1.20.0 - pyodbc>=5.0.0 integrations: diff --git a/providers/src/airflow/providers/openlineage/provider.yaml b/providers/src/airflow/providers/openlineage/provider.yaml index e638daba78ed..c9eaa5206992 100644 --- a/providers/src/airflow/providers/openlineage/provider.yaml +++ b/providers/src/airflow/providers/openlineage/provider.yaml @@ -25,6 +25,7 @@ state: ready source-date-epoch: 1730013356 # note that those versions are maintained by release manager - do not update them manually versions: + - 1.14.0 - 1.13.0 - 1.12.2 - 1.12.1 @@ -51,7 +52,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.6.0 + - apache-airflow-providers-common-sql>=1.20.0 - apache-airflow-providers-common-compat>=1.2.1 - attrs>=22.2 - openlineage-integration-common>=1.24.2 diff --git a/providers/src/airflow/providers/openlineage/sqlparser.py b/providers/src/airflow/providers/openlineage/sqlparser.py index 323ed8a11b8f..9751af3f7941 100644 --- a/providers/src/airflow/providers/openlineage/sqlparser.py +++ b/providers/src/airflow/providers/openlineage/sqlparser.py @@ -331,8 +331,11 @@ def split_sql_string(cls, sql: list[str] | str) -> list[str]: split_statement = DbApiHook.split_sql_string except (ImportError, AttributeError): # No common.sql Airflow provider available or version is too old. - def split_statement(sql: str) -> list[str]: - splits = sqlparse.split(sqlparse.format(sql, strip_comments=True)) + def split_statement(sql: str, strip_semicolon: bool = False) -> list[str]: + splits = sqlparse.split( + sql=sqlparse.format(sql, strip_comments=True), + strip_semicolon=strip_semicolon, + ) return [s for s in splits if s] if isinstance(sql, str): diff --git a/providers/src/airflow/providers/oracle/provider.yaml b/providers/src/airflow/providers/oracle/provider.yaml index 01309d91146b..c50924d55e65 100644 --- a/providers/src/airflow/providers/oracle/provider.yaml +++ b/providers/src/airflow/providers/oracle/provider.yaml @@ -61,7 +61,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 - oracledb>=2.0.0 additional-extras: diff --git a/providers/src/airflow/providers/postgres/provider.yaml b/providers/src/airflow/providers/postgres/provider.yaml index 51332b9b0fea..7ad546043967 100644 --- a/providers/src/airflow/providers/postgres/provider.yaml +++ b/providers/src/airflow/providers/postgres/provider.yaml @@ -67,7 +67,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.17.0 + - apache-airflow-providers-common-sql>=1.20.0 - psycopg2-binary>=2.9.4 additional-extras: diff --git a/providers/src/airflow/providers/presto/hooks/presto.py b/providers/src/airflow/providers/presto/hooks/presto.py index bd2436d5828a..adb4dffa6803 100644 --- a/providers/src/airflow/providers/presto/hooks/presto.py +++ b/providers/src/airflow/providers/presto/hooks/presto.py @@ -84,6 +84,7 @@ class PrestoHook(DbApiHook): default_conn_name = "presto_default" conn_type = "presto" hook_name = "Presto" + strip_semicolon = True def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/providers/src/airflow/providers/presto/provider.yaml b/providers/src/airflow/providers/presto/provider.yaml index 0cc9bc5a44d0..c6ce5ad40afb 100644 --- a/providers/src/airflow/providers/presto/provider.yaml +++ b/providers/src/airflow/providers/presto/provider.yaml @@ -62,7 +62,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 - presto-python-client>=0.8.4 # In pandas 2.2 minimal version of the sqlalchemy is 2.0 # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies diff --git a/providers/src/airflow/providers/slack/provider.yaml b/providers/src/airflow/providers/slack/provider.yaml index 1444036923e7..597b01861748 100644 --- a/providers/src/airflow/providers/slack/provider.yaml +++ b/providers/src/airflow/providers/slack/provider.yaml @@ -66,7 +66,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 - slack_sdk>=3.19.0 integrations: diff --git a/providers/src/airflow/providers/snowflake/provider.yaml b/providers/src/airflow/providers/snowflake/provider.yaml index 08a2ce861d47..a3bbe8992712 100644 --- a/providers/src/airflow/providers/snowflake/provider.yaml +++ b/providers/src/airflow/providers/snowflake/provider.yaml @@ -81,7 +81,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - apache-airflow-providers-common-compat>=1.1.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 # In pandas 2.2 minimal version of the sqlalchemy is 2.0 # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 diff --git a/providers/src/airflow/providers/sqlite/provider.yaml b/providers/src/airflow/providers/sqlite/provider.yaml index 586551285d83..4f00d3e6f437 100644 --- a/providers/src/airflow/providers/sqlite/provider.yaml +++ b/providers/src/airflow/providers/sqlite/provider.yaml @@ -56,7 +56,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 integrations: - integration-name: SQLite diff --git a/providers/src/airflow/providers/standard/provider.yaml b/providers/src/airflow/providers/standard/provider.yaml index d936d151ecd3..ba1a53e44bf8 100644 --- a/providers/src/airflow/providers/standard/provider.yaml +++ b/providers/src/airflow/providers/standard/provider.yaml @@ -29,7 +29,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.18.0 + - apache-airflow-providers-common-sql>=1.20.0 integrations: - integration-name: Standard diff --git a/providers/src/airflow/providers/teradata/provider.yaml b/providers/src/airflow/providers/teradata/provider.yaml index 3e304b7334b4..445c2606d331 100644 --- a/providers/src/airflow/providers/teradata/provider.yaml +++ b/providers/src/airflow/providers/teradata/provider.yaml @@ -36,7 +36,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - teradatasqlalchemy>=17.20.0.0 - teradatasql>=17.20.0.28 diff --git a/providers/src/airflow/providers/trino/hooks/trino.py b/providers/src/airflow/providers/trino/hooks/trino.py index 9e776361c147..e450c9b67e3d 100644 --- a/providers/src/airflow/providers/trino/hooks/trino.py +++ b/providers/src/airflow/providers/trino/hooks/trino.py @@ -86,6 +86,7 @@ class TrinoHook(DbApiHook): default_conn_name = "trino_default" conn_type = "trino" hook_name = "Trino" + strip_semicolon = True query_id = "" _test_connection_sql = "select 1" diff --git a/providers/src/airflow/providers/trino/provider.yaml b/providers/src/airflow/providers/trino/provider.yaml index 876244aa1c6a..a64fde3e2c8b 100644 --- a/providers/src/airflow/providers/trino/provider.yaml +++ b/providers/src/airflow/providers/trino/provider.yaml @@ -65,7 +65,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 # In pandas 2.2 minimal version of the sqlalchemy is 2.0 # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 diff --git a/providers/src/airflow/providers/vertica/provider.yaml b/providers/src/airflow/providers/vertica/provider.yaml index c33d202fb56b..e8bd4256b2de 100644 --- a/providers/src/airflow/providers/vertica/provider.yaml +++ b/providers/src/airflow/providers/vertica/provider.yaml @@ -54,7 +54,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.3.1 + - apache-airflow-providers-common-sql>=1.20.0 - vertica-python>=0.6.0 integrations: diff --git a/providers/src/airflow/providers/ydb/provider.yaml b/providers/src/airflow/providers/ydb/provider.yaml index f70a70b2834f..26ffd8f8f84c 100644 --- a/providers/src/airflow/providers/ydb/provider.yaml +++ b/providers/src/airflow/providers/ydb/provider.yaml @@ -33,7 +33,7 @@ versions: dependencies: - apache-airflow>=2.8.0 - - apache-airflow-providers-common-sql>=1.14.1 + - apache-airflow-providers-common-sql>=1.20.0 - ydb>=3.18.8 - ydb-dbapi>=0.1.0 diff --git a/providers/tests/presto/hooks/test_presto.py b/providers/tests/presto/hooks/test_presto.py index 2b9449eec23e..74ee349f9d82 100644 --- a/providers/tests/presto/hooks/test_presto.py +++ b/providers/tests/presto/hooks/test_presto.py @@ -276,6 +276,16 @@ def test_get_pandas_df(self): self.cur.execute.assert_called_once_with(statement, None) + def test_split_sql_string(self): + statement = "SELECT 1; SELECT 2" + result_sets = ["SELECT 1", "SELECT 2"] + self.cur.fetchall.return_value = result_sets + + assert result_sets == self.db_hook.split_sql_string( + sql=statement, + strip_semicolon=self.db_hook.strip_semicolon, + ) + def test_serialize_cell(self): assert "foo" == self.db_hook._serialize_cell("foo", None) assert 1 == self.db_hook._serialize_cell(1, None) diff --git a/providers/tests/trino/hooks/test_trino.py b/providers/tests/trino/hooks/test_trino.py index 312ae5ec5826..5f6e0961727d 100644 --- a/providers/tests/trino/hooks/test_trino.py +++ b/providers/tests/trino/hooks/test_trino.py @@ -345,6 +345,16 @@ def test_get_pandas_df(self): self.cur.execute.assert_called_once_with(statement, None) + def test_split_sql_string(self): + statement = "SELECT 1; SELECT 2" + result_sets = ["SELECT 1", "SELECT 2"] + self.cur.fetchall.return_value = result_sets + + assert result_sets == self.db_hook.split_sql_string( + sql=statement, + strip_semicolon=self.db_hook.strip_semicolon, + ) + @patch("airflow.providers.trino.hooks.trino.TrinoHook.run") def test_run(self, mock_run): sql = "SELECT 1"