diff --git a/benchmark/index.sql b/benchmark/index.sql index 9d0371f..8857cc5 100644 --- a/benchmark/index.sql +++ b/benchmark/index.sql @@ -1,5 +1,6 @@ BEGIN; CREATE TEMP TABLE _benchmark ( + dbname TEXT, filename TEXT, query TEXT, avg FLOAT, @@ -44,6 +45,7 @@ BEGIN INSERT INTO _benchmark SELECT + current_database() AS dbname, _filename, _query, round(avg(elapsed)::numeric, 0), diff --git a/benchmark/init.sh b/benchmark/init.sh index 1465b89..b57736b 100755 --- a/benchmark/init.sh +++ b/benchmark/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -e +set -Eeu set -o pipefail cd "$(dirname "$0")" set -x @@ -10,6 +10,26 @@ ls -lh ../vbb-2022-07-01.gtfs env | grep '^PG' || true +psql -c 'CREATE DATABASE benchmark_raw' +export PGDATABASE=benchmark_raw + ../cli.js -d \ --stops-location-index --stats-by-route-date=view \ ../vbb-2022-07-01.gtfs/*.csv | sponge | psql -b + +./run.sh + +# The VBB 2022-07-01 GTFS feed doesn't contain any frequencies rows. In order to benchmark the frequencies implementations, we use `--minimize-stoptimes` to generate an equivalent feed that includes frequencies. +gtfstidy --show-warnings \ + --minimize-stoptimes \ + -o ../vbb-2022-07-01.tidied.gtfs ../vbb-2022-07-01.gtfs +ls -lh ../vbb-2022-07-01.tidied.gtfs + +psql -c 'CREATE DATABASE benchmark_tidied' +export PGDATABASE=benchmark_tidied + +../cli.js -d \ + --stops-location-index --stats-by-route-date=view \ + ../vbb-2022-07-01.tidied.gtfs/*.txt | sponge | psql -b + +./run.sh diff --git a/lib/stop_times.js b/lib/stop_times.js index 2b0afdb..05a74f9 100644 --- a/lib/stop_times.js +++ b/lib/stop_times.js @@ -262,46 +262,13 @@ WITH stop_times_based AS NOT MATERIALIZED ( ) -- stop_times-based arrivals/departures SELECT - ( - encode(trip_id::bytea, 'base64') - || ':' || encode(( - extract(ISOYEAR FROM "date") - || '-' || lpad(extract(MONTH FROM "date")::text, 2, '0') - || '-' || lpad(extract(DAY FROM "date")::text, 2, '0') - )::bytea, 'base64') - || ':' || encode((stop_sequence::text)::bytea, 'base64') - -- frequencies_row - || ':' || encode('-1'::bytea, 'base64') - -- frequencies_it - || ':' || encode('-1'::bytea, 'base64') - ) as arrival_departure_id, - stop_times_based.*, -- todo: expose local arrival/departure "wall clock time"? - -1 AS frequencies_row, - -1 AS frequencies_it + -1 AS frequencies_row FROM stop_times_based UNION ALL -- frequencies-based arrivals/departures -SELECT - ( - encode(trip_id::bytea, 'base64') - || ':' || encode(( - extract(ISOYEAR FROM "date") - || '-' || lpad(extract(MONTH FROM "date")::text, 2, '0') - || '-' || lpad(extract(DAY FROM "date")::text, 2, '0') - )::bytea, 'base64') - || ':' || encode((stop_sequence::text)::bytea, 'base64') - || ':' || encode((frequencies_row::text)::bytea, 'base64') - || ':' || encode((frequencies_it::text)::bytea, 'base64') - ) as arrival_departure_id, - * -FROM ( -SELECT - *, - row_number() OVER (PARTITION BY trip_id, "date", stop_sequence)::integer AS frequencies_it -FROM ( SELECT -- stop_times_based.* except t_arrival & t_departure, duh -- todo: find a way to use all columns without explicitly enumerating them here @@ -341,15 +308,52 @@ FROM ( FROM stop_times_based JOIN "${opt.schema}".frequencies ON frequencies.trip_id = stop_times_based.trip_id WHERE frequencies.exact_times = 'schedule_based' -- todo: is this correct? -) t -) t ) frequencies_based; +${opt.postgraphile ? `\ +-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!) +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_short_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_long_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_type IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.direction_id IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.trip_headsign IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.stop_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures.station_name IS E'@omit'; +-- > If you want to rename just one field or type, your best bet is to use a [@name] smart comment […]. +-- > NOTE: this still uses the inflectors, but it pretends that the tables name is different, so the input to the inflectors differs. +-- https://www.graphile.org/postgraphile/inflection/#overriding-naming---one-off +COMMENT ON VIEW "${opt.schema}".arrivals_departures IS E'@name arrival_departures\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station'; +` : ''} + +CREATE OR REPLACE VIEW "${opt.schema}".arrivals_departures_with_ids AS +SELECT + *, + ( + encode(trip_id::bytea, 'base64') + || ':' || encode(( + extract(ISOYEAR FROM "date") + || '-' || lpad(extract(MONTH FROM "date")::text, 2, '0') + || '-' || lpad(extract(DAY FROM "date")::text, 2, '0') + )::bytea, 'base64') + || ':' || encode((stop_sequence::text)::bytea, 'base64') + || ':' || encode((frequencies_row::text)::bytea, 'base64') + || ':' || encode((frequencies_it::text)::bytea, 'base64') + ) as arrival_departure_id +FROM ( + SELECT + *, + (CASE WHEN frequencies_row = -1 + THEN -1 + ELSE row_number() OVER (PARTITION BY trip_id, "date", stop_sequence)::integer + END) AS frequencies_it + FROM "${opt.schema}".arrivals_departures +) t; + CREATE OR REPLACE FUNCTION "${opt.schema}".arrival_departure_by_arrival_departure_id(id TEXT) -RETURNS "${opt.schema}".arrivals_departures +RETURNS "${opt.schema}".arrivals_departures_with_ids AS $$ SELECT * - FROM "${opt.schema}".arrivals_departures + FROM "${opt.schema}".arrivals_departures_with_ids WHERE trip_id = convert_from(decode(split_part(id, ':', 1), 'base64'), 'UTF-8')::text AND "date" = (convert_from(decode(split_part(id, ':', 2), 'base64'), 'UTF-8')::text)::timestamp AND stop_sequence = (convert_from(decode(split_part(id, ':', 3), 'base64'), 'UTF-8')::text)::integer @@ -360,18 +364,19 @@ AS $$ $$ LANGUAGE SQL STABLE STRICT; ${opt.postgraphile ? `\ --- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!) -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_short_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_long_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_type IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.direction_id IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.trip_headsign IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.stop_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".arrivals_departures.station_name IS E'@omit'; +-- todo: DRY with comments on arrivals_departures +-- todo: what is the graphql field name? postgraphile singularifies most names +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_short_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_long_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_type IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.direction_id IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.trip_headsign IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.stop_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.station_name IS E'@omit'; -- > If you want to rename just one field or type, your best bet is to use a [@name] smart comment […]. -- > NOTE: this still uses the inflectors, but it pretends that the tables name is different, so the input to the inflectors differs. -- https://www.graphile.org/postgraphile/inflection/#overriding-naming---one-off -COMMENT ON VIEW "${opt.schema}".arrivals_departures IS E'@name arrival_departures\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station'; +COMMENT ON VIEW "${opt.schema}".arrivals_departures_with_ids IS E'@name arrivals_departures_with_ids\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station'; ` : ''} CREATE OR REPLACE VIEW "${opt.schema}".connections AS @@ -503,40 +508,13 @@ WITH stop_times_based AS NOT MATERIALIZED ( ) -- stop_times-based connections SELECT - ( - encode(trip_id::bytea, 'base64') - || ':' || encode(( - extract(ISOYEAR FROM "date") - || '-' || lpad(extract(MONTH FROM "date")::text, 2, '0') - || '-' || lpad(extract(DAY FROM "date")::text, 2, '0') - )::bytea, 'base64') - || ':' || encode((from_stop_sequence::text)::bytea, 'base64') - -- frequencies_row - || ':' || encode('-1'::bytea, 'base64') - -- frequencies_it - || ':' || encode('-1'::bytea, 'base64') - ) as connection_id, - stop_times_based.*, - -1 AS frequencies_row, - -1 AS frequencies_it + -1 AS frequencies_row FROM stop_times_based UNION ALL -- frequencies-based connections SELECT - ( - encode(trip_id::bytea, 'base64') - || ':' || encode(( - extract(ISOYEAR FROM "date") - || '-' || lpad(extract(MONTH FROM "date")::text, 2, '0') - || '-' || lpad(extract(DAY FROM "date")::text, 2, '0') - )::bytea, 'base64') - || ':' || encode((from_stop_sequence::text)::bytea, 'base64') - || ':' || encode((frequencies_row::text)::bytea, 'base64') - || ':' || encode((frequencies_it::text)::bytea, 'base64') - ) as connection_id, - -- stop_times_based.* except t_arrival & t_departure, duh -- todo: find a way to use all columns without explicitly enumerating them here route_id, route_short_name, route_long_name, route_type, @@ -586,26 +564,60 @@ SELECT to_station_name, to_wheelchair_boarding, - frequencies_row, - frequencies_it + frequencies_row FROM ( SELECT stop_times_based.*, frequencies.start_time, frequencies.end_time, frequencies.headway_secs, - frequencies_row, - (row_number() OVER (PARTITION BY stop_times_based.trip_id, "date", frequencies_row ORDER BY from_stop_sequence))::integer as frequencies_it + frequencies_row FROM stop_times_based JOIN "${opt.schema}".frequencies ON frequencies.trip_id = stop_times_based.trip_id WHERE frequencies.exact_times = 'schedule_based' -- todo: is this correct? ) frequencies_based; +${opt.postgraphile ? `\ +-- todo: allow filtering based on stop and/or route and/or trip and/or time frame +-- https://www.graphile.org/postgraphile/functions/#setof-functions---connections +COMMENT ON COLUMN "${opt.schema}".connections.route_short_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.route_long_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.route_type IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.direction_id IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.trip_headsign IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.from_stop_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.from_station_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.to_stop_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections.to_station_name IS E'@omit'; +COMMENT ON VIEW "${opt.schema}".connections IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation'; +` : ''} + +CREATE VIEW "${opt.schema}".connections_with_ids AS +SELECT + ( + encode(trip_id::bytea, 'base64') + || ':' || encode(( + extract(ISOYEAR FROM "date") + || '-' || lpad(extract(MONTH FROM "date")::text, 2, '0') + || '-' || lpad(extract(DAY FROM "date")::text, 2, '0') + )::bytea, 'base64') + || ':' || encode((from_stop_sequence::text)::bytea, 'base64') + || ':' || encode((frequencies_row::text)::bytea, 'base64') + || ':' || encode((frequencies_it::text)::bytea, 'base64') + ) as connection_id, + * +FROM ( + SELECT + *, + (row_number() OVER (PARTITION BY trip_id, "date", frequencies_row ORDER BY from_stop_sequence))::integer as frequencies_it + FROM "${opt.schema}".connections +) t; + CREATE OR REPLACE FUNCTION "${opt.schema}".connection_by_connection_id(id TEXT) -RETURNS "${opt.schema}".connections +RETURNS "${opt.schema}".connections_with_ids AS $$ SELECT * - FROM "${opt.schema}".connections + FROM "${opt.schema}".connections_with_ids WHERE trip_id = convert_from(decode(split_part(id, ':', 1), 'base64'), 'UTF-8')::text AND "date" = (convert_from(decode(split_part(id, ':', 2), 'base64'), 'UTF-8')::text)::timestamp AND from_stop_sequence = (convert_from(decode(split_part(id, ':', 3), 'base64'), 'UTF-8')::text)::integer @@ -616,19 +628,18 @@ AS $$ $$ LANGUAGE SQL STABLE STRICT; ${opt.postgraphile ? `\ --- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!) -- todo: allow filtering based on stop and/or route and/or trip and/or time frame -- https://www.graphile.org/postgraphile/functions/#setof-functions---connections -COMMENT ON COLUMN "${opt.schema}".connections.route_short_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.route_long_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.route_type IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.direction_id IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.trip_headsign IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.from_stop_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.from_station_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.to_stop_name IS E'@omit'; -COMMENT ON COLUMN "${opt.schema}".connections.to_station_name IS E'@omit'; -COMMENT ON VIEW "${opt.schema}".connections IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_short_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_long_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_type IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.direction_id IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.trip_headsign IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.from_stop_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.from_station_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.to_stop_name IS E'@omit'; +COMMENT ON COLUMN "${opt.schema}".connections_with_ids.to_station_name IS E'@omit'; +COMMENT ON VIEW "${opt.schema}".connections_with_ids IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation'; ` : ''} `