Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arrivals_departures/connections: move id column into separate view #66

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions benchmark/index.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
BEGIN;
CREATE TEMP TABLE _benchmark (
dbname TEXT,
filename TEXT,
query TEXT,
avg FLOAT,
Expand Down Expand Up @@ -44,6 +45,7 @@ BEGIN

INSERT INTO _benchmark
SELECT
current_database() AS dbname,
_filename,
_query,
round(avg(elapsed)::numeric, 0),
Expand Down
22 changes: 21 additions & 1 deletion benchmark/init.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

set -e
set -Eeu
set -o pipefail
cd "$(dirname "$0")"
set -x
Expand All @@ -10,6 +10,26 @@ ls -lh ../vbb-2022-07-01.gtfs

env | grep '^PG' || true

psql -c 'CREATE DATABASE benchmark_raw'
export PGDATABASE=benchmark_raw

../cli.js -d \
--stops-location-index --stats-by-route-date=view \
../vbb-2022-07-01.gtfs/*.csv | sponge | psql -b

./run.sh

# The VBB 2022-07-01 GTFS feed doesn't contain any frequencies rows. In order to benchmark the frequencies implementations, we use `--minimize-stoptimes` to generate an equivalent feed that includes frequencies.
gtfstidy --show-warnings \
--minimize-stoptimes \
-o ../vbb-2022-07-01.tidied.gtfs ../vbb-2022-07-01.gtfs
ls -lh ../vbb-2022-07-01.tidied.gtfs

psql -c 'CREATE DATABASE benchmark_tidied'
export PGDATABASE=benchmark_tidied

../cli.js -d \
--stops-location-index --stats-by-route-date=view \
../vbb-2022-07-01.tidied.gtfs/*.txt | sponge | psql -b

./run.sh
195 changes: 103 additions & 92 deletions lib/stop_times.js
Original file line number Diff line number Diff line change
Expand Up @@ -262,46 +262,13 @@ WITH stop_times_based AS NOT MATERIALIZED (
)
-- stop_times-based arrivals/departures
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((stop_sequence::text)::bytea, 'base64')
-- frequencies_row
|| ':' || encode('-1'::bytea, 'base64')
-- frequencies_it
|| ':' || encode('-1'::bytea, 'base64')
) as arrival_departure_id,

stop_times_based.*,
-- todo: expose local arrival/departure "wall clock time"?

-1 AS frequencies_row,
-1 AS frequencies_it
-1 AS frequencies_row
FROM stop_times_based
UNION ALL
-- frequencies-based arrivals/departures
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as arrival_departure_id,
*
FROM (
SELECT
*,
row_number() OVER (PARTITION BY trip_id, "date", stop_sequence)::integer AS frequencies_it
FROM (
SELECT
-- stop_times_based.* except t_arrival & t_departure, duh
-- todo: find a way to use all columns without explicitly enumerating them here
Expand Down Expand Up @@ -341,15 +308,52 @@ FROM (
FROM stop_times_based
JOIN "${opt.schema}".frequencies ON frequencies.trip_id = stop_times_based.trip_id
WHERE frequencies.exact_times = 'schedule_based' -- todo: is this correct?
) t
) t
) frequencies_based;

${opt.postgraphile ? `\
-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!)
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.station_name IS E'@omit';
-- > If you want to rename just one field or type, your best bet is to use a [@name] smart comment […].
-- > NOTE: this still uses the inflectors, but it pretends that the tables name is different, so the input to the inflectors differs.
-- https://www.graphile.org/postgraphile/inflection/#overriding-naming---one-off
COMMENT ON VIEW "${opt.schema}".arrivals_departures IS E'@name arrival_departures\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station';
` : ''}

CREATE OR REPLACE VIEW "${opt.schema}".arrivals_departures_with_ids AS
SELECT
*,
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as arrival_departure_id
FROM (
SELECT
*,
(CASE WHEN frequencies_row = -1
THEN -1
ELSE row_number() OVER (PARTITION BY trip_id, "date", stop_sequence)::integer
END) AS frequencies_it
FROM "${opt.schema}".arrivals_departures
) t;

CREATE OR REPLACE FUNCTION "${opt.schema}".arrival_departure_by_arrival_departure_id(id TEXT)
RETURNS "${opt.schema}".arrivals_departures
RETURNS "${opt.schema}".arrivals_departures_with_ids
AS $$
SELECT *
FROM "${opt.schema}".arrivals_departures
FROM "${opt.schema}".arrivals_departures_with_ids
WHERE trip_id = convert_from(decode(split_part(id, ':', 1), 'base64'), 'UTF-8')::text
AND "date" = (convert_from(decode(split_part(id, ':', 2), 'base64'), 'UTF-8')::text)::timestamp
AND stop_sequence = (convert_from(decode(split_part(id, ':', 3), 'base64'), 'UTF-8')::text)::integer
Expand All @@ -360,18 +364,19 @@ AS $$
$$ LANGUAGE SQL STABLE STRICT;

${opt.postgraphile ? `\
-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!)
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.station_name IS E'@omit';
-- todo: DRY with comments on arrivals_departures
-- todo: what is the graphql field name? postgraphile singularifies most names
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.station_name IS E'@omit';
-- > If you want to rename just one field or type, your best bet is to use a [@name] smart comment […].
-- > NOTE: this still uses the inflectors, but it pretends that the tables name is different, so the input to the inflectors differs.
-- https://www.graphile.org/postgraphile/inflection/#overriding-naming---one-off
COMMENT ON VIEW "${opt.schema}".arrivals_departures IS E'@name arrival_departures\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station';
COMMENT ON VIEW "${opt.schema}".arrivals_departures_with_ids IS E'@name arrivals_departures_with_ids\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station';
` : ''}

CREATE OR REPLACE VIEW "${opt.schema}".connections AS
Expand Down Expand Up @@ -503,40 +508,13 @@ WITH stop_times_based AS NOT MATERIALIZED (
)
-- stop_times-based connections
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((from_stop_sequence::text)::bytea, 'base64')
-- frequencies_row
|| ':' || encode('-1'::bytea, 'base64')
-- frequencies_it
|| ':' || encode('-1'::bytea, 'base64')
) as connection_id,

stop_times_based.*,

-1 AS frequencies_row,
-1 AS frequencies_it
-1 AS frequencies_row
FROM stop_times_based
UNION ALL
-- frequencies-based connections
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((from_stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as connection_id,

-- stop_times_based.* except t_arrival & t_departure, duh
-- todo: find a way to use all columns without explicitly enumerating them here
route_id, route_short_name, route_long_name, route_type,
Expand Down Expand Up @@ -586,26 +564,60 @@ SELECT
to_station_name,
to_wheelchair_boarding,

frequencies_row,
frequencies_it
frequencies_row
FROM (
SELECT
stop_times_based.*,
frequencies.start_time,
frequencies.end_time,
frequencies.headway_secs,
frequencies_row,
(row_number() OVER (PARTITION BY stop_times_based.trip_id, "date", frequencies_row ORDER BY from_stop_sequence))::integer as frequencies_it
frequencies_row
FROM stop_times_based
JOIN "${opt.schema}".frequencies ON frequencies.trip_id = stop_times_based.trip_id
WHERE frequencies.exact_times = 'schedule_based' -- todo: is this correct?
) frequencies_based;

${opt.postgraphile ? `\
-- todo: allow filtering based on stop and/or route and/or trip and/or time frame
-- https://www.graphile.org/postgraphile/functions/#setof-functions---connections
COMMENT ON COLUMN "${opt.schema}".connections.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_station_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_station_name IS E'@omit';
COMMENT ON VIEW "${opt.schema}".connections IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation';
` : ''}

CREATE VIEW "${opt.schema}".connections_with_ids AS
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((from_stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as connection_id,
*
FROM (
SELECT
*,
(row_number() OVER (PARTITION BY trip_id, "date", frequencies_row ORDER BY from_stop_sequence))::integer as frequencies_it
FROM "${opt.schema}".connections
) t;

CREATE OR REPLACE FUNCTION "${opt.schema}".connection_by_connection_id(id TEXT)
RETURNS "${opt.schema}".connections
RETURNS "${opt.schema}".connections_with_ids
AS $$
SELECT *
FROM "${opt.schema}".connections
FROM "${opt.schema}".connections_with_ids
WHERE trip_id = convert_from(decode(split_part(id, ':', 1), 'base64'), 'UTF-8')::text
AND "date" = (convert_from(decode(split_part(id, ':', 2), 'base64'), 'UTF-8')::text)::timestamp
AND from_stop_sequence = (convert_from(decode(split_part(id, ':', 3), 'base64'), 'UTF-8')::text)::integer
Expand All @@ -616,19 +628,18 @@ AS $$
$$ LANGUAGE SQL STABLE STRICT;

${opt.postgraphile ? `\
-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!)
-- todo: allow filtering based on stop and/or route and/or trip and/or time frame
-- https://www.graphile.org/postgraphile/functions/#setof-functions---connections
COMMENT ON COLUMN "${opt.schema}".connections.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_station_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_station_name IS E'@omit';
COMMENT ON VIEW "${opt.schema}".connections IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.from_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.from_station_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.to_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.to_station_name IS E'@omit';
COMMENT ON VIEW "${opt.schema}".connections_with_ids IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation';
` : ''}
`

Expand Down
Loading