Skip to content

Commit

Permalink
feat(trino,presto): add missing time grains (apache#30926)
Browse files Browse the repository at this point in the history
  • Loading branch information
villebro authored Nov 15, 2024
1 parent 824eaf8 commit e528cb4
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 2 deletions.
7 changes: 7 additions & 0 deletions superset/db_engine_specs/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,15 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta):
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))",
TimeGrain.FIVE_SECONDS: "date_trunc('second', CAST({col} AS TIMESTAMP)) - interval '1' second * (second(CAST({col} AS TIMESTAMP)) % 5)",
TimeGrain.THIRTY_SECONDS: "date_trunc('second', CAST({col} AS TIMESTAMP)) - interval '1' second * (second(CAST({col} AS TIMESTAMP)) % 30)",
TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))",
TimeGrain.FIVE_MINUTES: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 5)",
TimeGrain.TEN_MINUTES: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 10)",
TimeGrain.FIFTEEN_MINUTES: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 15)",
TimeGrain.HALF_HOUR: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 30)",
TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))",
TimeGrain.SIX_HOURS: "date_trunc('hour', CAST({col} AS TIMESTAMP)) - interval '1' hour * (hour(CAST({col} AS TIMESTAMP)) % 6)",
TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))",
TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))",
TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))",
Expand Down
65 changes: 64 additions & 1 deletion tests/unit_tests/db_engine_specs/test_presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pytest
import pytz
from pyhive.sqlalchemy_presto import PrestoDialect
from sqlalchemy import sql, text, types
from sqlalchemy import column, sql, text, types
from sqlalchemy.engine.url import make_url

from superset.sql_parse import Table
Expand Down Expand Up @@ -240,3 +240,66 @@ def test_get_default_catalog() -> None:
sqlalchemy_uri="presto://localhost:8080/hive/default",
)
assert PrestoEngineSpec.get_default_catalog(database) == "hive"


@pytest.mark.parametrize(
"time_grain,expected_result",
[
("PT1S", "date_trunc('second', CAST(col AS TIMESTAMP))"),
(
"PT5S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT30S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1M", "date_trunc('minute', CAST(col AS TIMESTAMP))"),
(
"PT5M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT10M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 10)",
),
(
"PT15M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 15)",
),
(
"PT0.5H",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1H", "date_trunc('hour', CAST(col AS TIMESTAMP))"),
(
"PT6H",
"date_trunc('hour', CAST(col AS TIMESTAMP)) - interval '1' hour * (hour(CAST(col AS TIMESTAMP)) % 6)",
),
("P1D", "date_trunc('day', CAST(col AS TIMESTAMP))"),
("P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
("P1M", "date_trunc('month', CAST(col AS TIMESTAMP))"),
("P3M", "date_trunc('quarter', CAST(col AS TIMESTAMP))"),
("P1Y", "date_trunc('year', CAST(col AS TIMESTAMP))"),
(
"1969-12-28T00:00:00Z/P1W",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) - interval '1' day",
),
("1969-12-29T00:00:00Z/P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
(
"P1W/1970-01-03T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) + interval '5' day",
),
(
"P1W/1970-01-04T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP)) + interval '6' day",
),
],
)
def test_timegrain_expressions(time_grain: str, expected_result: str) -> None:
from superset.db_engine_specs.presto import PrestoEngineSpec as spec

actual = str(
spec.get_timestamp_expr(col=column("col"), pdf=None, time_grain=time_grain)
)
assert actual == expected_result
65 changes: 64 additions & 1 deletion tests/unit_tests/db_engine_specs/test_trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from flask import g, has_app_context
from pytest_mock import MockerFixture
from requests.exceptions import ConnectionError as RequestsConnectionError
from sqlalchemy import sql, text, types
from sqlalchemy import column, sql, text, types
from sqlalchemy.dialects import sqlite
from sqlalchemy.engine.url import make_url
from sqlalchemy.exc import NoSuchTableError
Expand Down Expand Up @@ -847,3 +847,66 @@ def test_get_oauth2_token(
},
timeout=30.0,
)


@pytest.mark.parametrize(
"time_grain,expected_result",
[
("PT1S", "date_trunc('second', CAST(col AS TIMESTAMP))"),
(
"PT5S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT30S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1M", "date_trunc('minute', CAST(col AS TIMESTAMP))"),
(
"PT5M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT10M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 10)",
),
(
"PT15M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 15)",
),
(
"PT0.5H",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1H", "date_trunc('hour', CAST(col AS TIMESTAMP))"),
(
"PT6H",
"date_trunc('hour', CAST(col AS TIMESTAMP)) - interval '1' hour * (hour(CAST(col AS TIMESTAMP)) % 6)",
),
("P1D", "date_trunc('day', CAST(col AS TIMESTAMP))"),
("P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
("P1M", "date_trunc('month', CAST(col AS TIMESTAMP))"),
("P3M", "date_trunc('quarter', CAST(col AS TIMESTAMP))"),
("P1Y", "date_trunc('year', CAST(col AS TIMESTAMP))"),
(
"1969-12-28T00:00:00Z/P1W",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) - interval '1' day",
),
("1969-12-29T00:00:00Z/P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
(
"P1W/1970-01-03T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) + interval '5' day",
),
(
"P1W/1970-01-04T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP)) + interval '6' day",
),
],
)
def test_timegrain_expressions(time_grain: str, expected_result: str) -> None:
from superset.db_engine_specs.trino import TrinoEngineSpec as spec

actual = str(
spec.get_timestamp_expr(col=column("col"), pdf=None, time_grain=time_grain)
)
assert actual == expected_result

0 comments on commit e528cb4

Please sign in to comment.