diff --git a/docs/dev/spatial-data.md b/docs/dev/spatial-data.md index c9bb27074..254c5b1b5 100644 --- a/docs/dev/spatial-data.md +++ b/docs/dev/spatial-data.md @@ -35,3 +35,37 @@ data into. ```sh ./scripts/load-spatial-data.sh ``` + +## Updating geospatial tables + +Our geospatial data is managed by a set of utility scripts in the `spatial-data` +directory at the root of the project. Each spatial data source is represented by +a Javascript file in the `spatial-data/sources` directory. This Javascript file +includes code for creating and updating schema versions as well as code for +loading the actual data into the database. + +A sources script file should export an object that looks roughly like this: + +```js + table: , + schemas: { + : async function() {}, + : async function() {}, + }, + loadData: async function() {} +``` + +The `table` property is the name of the table used by the source. + +The `schemas` property is an object whose keys are integers representing schema +versions. The keys are used to determine the list of schema upgrades necessary +for a given table. The values are functions that are called if a given schema +upgrade is necessary. A schema upgrade function should return `true` if the +source data needs to be reloaded or `false` if only a schema change is +necessary. If there is an update that _only_ requires reloading data, there +should be still be an new version created in the `schemas` property, but its +function should simply return `true`. + +The `loadData` function is called if any necessary schema upgrades also need +data to be loaded/reloaded. The `loadData` should assume that the schema is the +most recent version and should not need to do any schema consistency checks. diff --git a/spatial-data/lib/meta.js b/spatial-data/lib/meta.js index 6b4161b9c..2cb2153b7 100644 --- a/spatial-data/lib/meta.js +++ b/spatial-data/lib/meta.js @@ -1,16 +1,18 @@ const { openDatabase } = require("./db.js"); -const { metadata: counties } = require("../sources/counties.js"); -const { metadata: cwas } = require("../sources/countyWarningAreas.js"); -const { metadata: places } = require("../sources/places.js"); -const { metadata: states } = require("../sources/states.js"); -const { metadata: zones } = require("../sources/zones.js"); +const counties = require("../sources/counties.js"); +const cwas = require("../sources/countyWarningAreas.js"); +const places = require("../sources/places.js"); +const states = require("../sources/states.js"); +const zones = require("../sources/zones.js"); +// These should be in dependency order. That is, if any table depends on another +// table, the dependent table should be listed *after* its dependency. const targets = { + states, counties, cwas, places, - states, zones, }; @@ -43,10 +45,16 @@ module.exports = async () => { const results = {}; for (const [target, metadata] of Object.entries(targets)) { const databaseVersion = +(existing[metadata.table] ?? 0); - const currentVersion = metadata?.version ?? 0; + + const currentVersion = Math.max( + ...Object.keys(metadata?.schemas).map((v) => +v), + ); results[target] = { update: currentVersion > databaseVersion, + from: databaseVersion, + to: currentVersion, + metadata, }; } @@ -60,14 +68,17 @@ module.exports.update = async () => { for await (const [source, metadata] of Object.entries(targets)) { if (meta[source].update) { - console.log(`setting ${metadata.table} to version ${metadata.version}`); + const currentVersion = Math.max( + ...Object.keys(metadata?.schemas).map((v) => +v), + ); + console.log(`setting ${metadata.table} to version ${currentVersion}`); // UPSERT query, essentially const sql = `INSERT INTO weathergov_geo_metadata (table_name, version) - VALUES("${metadata.table}", "${metadata.version}") + VALUES("${metadata.table}", "${currentVersion}") ON DUPLICATE KEY - UPDATE version="${metadata.version}"`; + UPDATE version="${currentVersion}"`; await db.query(sql); } } diff --git a/spatial-data/lib/schema.js b/spatial-data/lib/schema.js new file mode 100644 index 000000000..c023c0839 --- /dev/null +++ b/spatial-data/lib/schema.js @@ -0,0 +1,19 @@ +module.exports = async ({ from, metadata: { table, schemas } }) => { + const schemaVersions = Object.keys(schemas).filter( + (version) => +version > from, + ); + const upgrades = [...Array(schemaVersions.length)].map( + // Plus one because our schema versions are 1-based, not 0-based. + (_, i) => i + from + 1, + ); + + let needsDataUpdate = false; + for await (const version of upgrades) { + console.log(` upgrading ${table} schema to version ${version}`); + const versionNeedsDataUpdate = await schemas[version](); + + needsDataUpdate = needsDataUpdate || versionNeedsDataUpdate; + } + + return needsDataUpdate; +}; diff --git a/spatial-data/load-shapefiles.js b/spatial-data/load-shapefiles.js index 47ba33bb8..2b0e2013c 100644 --- a/spatial-data/load-shapefiles.js +++ b/spatial-data/load-shapefiles.js @@ -1,11 +1,7 @@ const { downloadAndUnzip, unzip } = require("./lib/prep.js"); const metadata = require("./lib/meta.js"); -const loadCounties = require("./sources/counties.js"); -const loadCWAs = require("./sources/countyWarningAreas.js"); -const loadPlaces = require("./sources/places.js"); -const loadStates = require("./sources/states.js"); -const loadZones = require("./sources/zones.js"); +const updateSchema = require("./lib/schema.js"); async function main() { const meta = await metadata(); @@ -29,6 +25,7 @@ async function main() { const zips = []; for (const [target, { update }] of Object.entries(meta)) { + console.log(`Fetching data for ${target}...`); if (update) { if (dataUrls[target]) { urls.push(...dataUrls[target]); @@ -37,7 +34,7 @@ async function main() { zips.push(...dataZips[target]); } } else { - console.log(`${target} already up-to-date; skipping`); + console.log(` already up-to-date; skipping`); } } @@ -49,20 +46,15 @@ async function main() { await unzip(zip); } - if (meta.states.update) { - await loadStates(); - } - if (meta.counties.update) { - await loadCounties(); - } - if (meta.cwas.update) { - await loadCWAs(); - } - if (meta.zones.update) { - await loadZones(); - } - if (meta.places.update) { - await loadPlaces(); + for await (const [source, sourceMetadata] of Object.entries(meta)) { + if (sourceMetadata.update) { + console.log(`${source} needs updating...`); + const importData = await updateSchema(sourceMetadata); + if (importData) { + console.log(` ${source} requires data loading...`); + await sourceMetadata.metadata.loadData(); + } + } } await metadata.update(); diff --git a/spatial-data/sources/counties.js b/spatial-data/sources/counties.js index 94e0fb3b5..9d661bc91 100644 --- a/spatial-data/sources/counties.js +++ b/spatial-data/sources/counties.js @@ -1,38 +1,44 @@ const shapefile = require("shapefile"); - +const { table: statesTable } = require("./states.js"); const { dropIndexIfExists, openDatabase } = require("../lib/db.js"); const metadata = { table: "weathergov_geo_counties", - version: 1, }; -module.exports = async () => { - console.log("loading counties..."); - const db = await openDatabase(); +const schemas = { + 1: async () => { + const db = await openDatabase(); + + await db.query( + `CREATE TABLE IF NOT EXISTS + ${metadata.table} + ( + id int NOT NULL AUTO_INCREMENT PRIMARY KEY, + state VARCHAR(2), + stateName TEXT, + stateFips VARCHAR(2), + countyName TEXT, + countyFips VARCHAR(5), + timezone TEXT, + dst BOOLEAN, + shape MULTIPOLYGON NOT NULL + )`, + ); + + await db.end(); + return true; + }, +}; + +const loadData = async () => { + console.log(" loading counties data"); + + const db = await openDatabase(); const file = await shapefile.open(`./c_05mr24.shp`); - await db.query( - `CREATE TABLE IF NOT EXISTS - ${metadata.table} - ( - id int NOT NULL AUTO_INCREMENT PRIMARY KEY, - state VARCHAR(2), - stateName TEXT, - stateFips VARCHAR(2), - countyName TEXT, - countyFips VARCHAR(5), - timezone TEXT, - dst BOOLEAN, - shape MULTIPOLYGON NOT NULL - )`, - ); - await dropIndexIfExists( - db, - "counties_spatial_idx", - "weathergov_geo_counties", - ); + await dropIndexIfExists(db, "counties_spatial_idx", metadata.table); const shapeTzToIANA = new Map([ ["V", "America/Puerto_Rico"], @@ -47,8 +53,8 @@ module.exports = async () => { ["S", "Pacific/Pago_Pago"], ]); - await db.query("TRUNCATE TABLE weathergov_geo_counties"); - await db.query("ALTER TABLE weathergov_geo_counties AUTO_INCREMENT=0"); + await db.query(`TRUNCATE TABLE ${metadata.table}`); + await db.query(`ALTER TABLE ${metadata.table} AUTO_INCREMENT=0`); const getSqlForShape = async ({ done, value }) => { if (done) { @@ -71,7 +77,7 @@ module.exports = async () => { const observesDST = tz.toUpperCase() === tz; await db.query( - `INSERT INTO weathergov_geo_counties + `INSERT INTO ${metadata.table} (state, countyName, countyFips, timezone, dst, shape) VALUES( '${state}', @@ -91,25 +97,25 @@ module.exports = async () => { // Once we've got all the counties loaded, grab the associated full state // names and state FIPS codes from the states table. await db.query( - `UPDATE weathergov_geo_counties c + `UPDATE ${metadata.table} c SET stateName=( - SELECT name FROM weathergov_geo_states s + SELECT name FROM ${statesTable} s WHERE s.state=c.state ), stateFips=( - SELECT fips FROM weathergov_geo_states s + SELECT fips FROM ${statesTable} s WHERE s.state=c.state )`, ); await db.query( - "CREATE SPATIAL INDEX counties_spatial_idx ON weathergov_geo_counties(shape)", + `CREATE SPATIAL INDEX counties_spatial_idx ON ${metadata.table}(shape)`, ); db.end(); }; -module.exports.metadata = metadata; +module.exports = { ...metadata, schemas, loadData }; diff --git a/spatial-data/sources/countyWarningAreas.js b/spatial-data/sources/countyWarningAreas.js index 2db60cbf9..9f899b045 100644 --- a/spatial-data/sources/countyWarningAreas.js +++ b/spatial-data/sources/countyWarningAreas.js @@ -4,29 +4,38 @@ const { dropIndexIfExists, openDatabase } = require("../lib/db.js"); const metadata = { table: "weathergov_geo_cwas", - version: 1, }; -module.exports = async () => { - console.log("loading WFOs..."); +const schemas = { + 1: async () => { + const db = await openDatabase(); + + await db.query(` + CREATE TABLE IF NOT EXISTS + ${metadata.table} + ( + id int NOT NULL AUTO_INCREMENT PRIMARY KEY, + wfo VARCHAR(3), + cwa VARCHAR(3), + region VARCHAR(2), + city VARCHAR(50), + state VARCHAR(50), + st VARCHAR(2), + shape MULTIPOLYGON NOT NULL + )`); + + await db.end(); + + return true; + }, +}; + +const loadData = async () => { + console.log(" loading WFOs/CWAs data"); const db = await openDatabase(); const file = await shapefile.open(`./w_05mr24.shp`); - await db.query(` -CREATE TABLE IF NOT EXISTS - ${metadata.table} - ( - id int NOT NULL AUTO_INCREMENT PRIMARY KEY, - wfo VARCHAR(3), - cwa VARCHAR(3), - region VARCHAR(2), - city VARCHAR(50), - state VARCHAR(50), - st VARCHAR(2), - shape MULTIPOLYGON NOT NULL -)`); - await dropIndexIfExists(db, "cwas_spatial_idx", "weathergov_geo_cwas"); await db.query("TRUNCATE TABLE weathergov_geo_cwas"); await db.query("ALTER TABLE weathergov_geo_cwas AUTO_INCREMENT=0"); @@ -79,4 +88,4 @@ CREATE TABLE IF NOT EXISTS db.end(); }; -module.exports.metadata = metadata; +module.exports = { ...metadata, schemas, loadData }; diff --git a/spatial-data/sources/places.js b/spatial-data/sources/places.js index 8a8ffc10e..22c2b6cb8 100644 --- a/spatial-data/sources/places.js +++ b/spatial-data/sources/places.js @@ -5,11 +5,36 @@ const { US_CODES } = require("../lib/util.js"); const metadata = { table: "weathergov_geo_places", - version: 1, }; -module.exports = async () => { - console.log("loading places..."); +const schemas = { + 1: async () => { + const db = await openDatabase(); + + await db.query( + `CREATE TABLE IF NOT EXISTS + ${metadata.table} + ( + id int NOT NULL AUTO_INCREMENT PRIMARY KEY, + name TEXT, + state TEXT, + stateName TEXT, + stateFIPS VARCHAR(2), + county TEXT, + countyFIPS VARCHAR(5), + timezone TEXT, + point POINT NOT NULL + )`, + ); + + await db.end(); + + return true; + }, +}; + +const loadData = async () => { + console.log(" loading places data"); const parameters = [ "undefined", @@ -123,21 +148,6 @@ module.exports = async () => { }), ); - await db.query( - `CREATE TABLE IF NOT EXISTS - ${metadata.table} - ( - id int NOT NULL AUTO_INCREMENT PRIMARY KEY, - name TEXT, - state TEXT, - stateName TEXT, - stateFIPS VARCHAR(2), - county TEXT, - countyFIPS VARCHAR(5), - timezone TEXT, - point POINT NOT NULL - )`, - ); await dropIndexIfExists(db, "places_spatial_idx", "weathergov_geo_places"); await db.query("TRUNCATE TABLE weathergov_geo_places"); await db.query("ALTER TABLE weathergov_geo_places AUTO_INCREMENT=0"); @@ -147,7 +157,7 @@ module.exports = async () => { // If the place is in one of the US // territories, we use the country code // for that territory as the state - let state = place.state; + let { state } = place; if (place.country !== "US") { state = place.country; } @@ -205,4 +215,4 @@ module.exports = async () => { db.end(); }; -module.exports.metadata = metadata; +module.exports = { ...metadata, schemas, loadData }; diff --git a/spatial-data/sources/states.js b/spatial-data/sources/states.js index 4e187a510..5a130558d 100644 --- a/spatial-data/sources/states.js +++ b/spatial-data/sources/states.js @@ -4,27 +4,36 @@ const { dropIndexIfExists, openDatabase } = require("../lib/db.js"); const metadata = { table: "weathergov_geo_states", - version: 1, }; -module.exports = async () => { - console.log("loading states..."); +const schemas = { + 1: async () => { + const db = await openDatabase(); + + await db.query( + `CREATE TABLE IF NOT EXISTS + ${metadata.table} + ( + id int NOT NULL AUTO_INCREMENT PRIMARY KEY, + state VARCHAR(2), + name TEXT, + fips VARCHAR(2), + shape MULTIPOLYGON NOT NULL + )`, + ); + + await db.end(); + + return true; + }, +}; + +const loadData = async () => { + console.log(" loading states data"); const db = await openDatabase(); const file = await shapefile.open(`./s_05mr24.shp`); - await db.query( - `CREATE TABLE IF NOT EXISTS - ${metadata.table} - ( - id int NOT NULL AUTO_INCREMENT PRIMARY KEY, - state VARCHAR(2), - name TEXT, - fips VARCHAR(2), - shape MULTIPOLYGON NOT NULL - )`, - ); - await dropIndexIfExists(db, "states_spatial_idx", "weathergov_geo_states"); await db.query("TRUNCATE TABLE weathergov_geo_states"); @@ -70,4 +79,4 @@ module.exports = async () => { db.end(); }; -module.exports.metadata = metadata; +module.exports = { ...metadata, schemas, loadData }; diff --git a/spatial-data/sources/zones.js b/spatial-data/sources/zones.js index 8ee5b131e..63e0a6ef1 100644 --- a/spatial-data/sources/zones.js +++ b/spatial-data/sources/zones.js @@ -1,37 +1,64 @@ -const fs = require("node:fs/promises"); const shapefile = require("shapefile"); const { dropIndexIfExists, openDatabase } = require("../lib/db.js"); const metadata = { table: "weathergov_geo_zones", - version: 2, }; -module.exports = async () => { - console.log("loading zones..."); - const db = await openDatabase(); +const schemas = { + 1: async () => { + const db = await openDatabase(); - await db.query( - `CREATE TABLE IF NOT EXISTS - ${metadata.table} - ( - id varchar(45) NOT NULL PRIMARY KEY, - state VARCHAR(2), - shape MULTIPOLYGON NOT NULL - )`, - ); + await db.query( + `CREATE TABLE IF NOT EXISTS + ${metadata.table} + ( + id varchar(45) NOT NULL PRIMARY KEY, + state VARCHAR(2), + shape MULTIPOLYGON NOT NULL + )`, + ); + await db.end(); + return true; + }, + + // No schema change, but need to reload data. + 2: async () => { + const db = await openDatabase(); + // Version 2: Change the shape column into a collection rather than a single + // multipolygon. This allows us to capture all of the polygons for a zone as + // a collection rather than trying to collect or union them into one entity. + await db.query( + `ALTER TABLE ${metadata.table} MODIFY shape GEOMETRYCOLLECTION`, + ); + await db.end(); + + return true; + }, + + 3: async () => { + const db = await openDatabase(); + await db.query( + `ALTER TABLE ${metadata.table} MODIFY shape GEOMETRYCOLLECTION NOT NULL`, + ); + await db.query( + `CREATE SPATIAL INDEX zones_spatial_idx ON ${metadata.table}(shape)`, + ); + await db.end(); + + // No data load needed, just creating the index. + return false; + }, +}; + +const loadData = async () => { + console.log(" loading zones data"); + const db = await openDatabase(); await dropIndexIfExists(db, "zones_spatial_idx", metadata.table); await db.query(`TRUNCATE TABLE ${metadata.table}`); - // Version 2: Change the shape column into a collection rather than a single - // multipolygon. This allows us to capture all of the polygons for a zone as - // a collection rather than trying to collect or union them into one entity. - await db.query( - `ALTER TABLE ${metadata.table} MODIFY shape GEOMETRYCOLLECTION`, - ); - const found = new Map(); const processFile = async (filename, zoneType) => { @@ -95,7 +122,11 @@ module.exports = async () => { ); } + await db.query( + `CREATE SPATIAL INDEX zones_spatial_idx ON ${metadata.table}(shape)`, + ); + db.end(); }; -module.exports.metadata = metadata; +module.exports = { ...metadata, schemas, loadData };