Skip to content

Commit

Permalink
Merge pull request #1492 from weather-gov/mgwalker/1490-spatial-table…
Browse files Browse the repository at this point in the history
…-keys

Update how spatial schemas are managed; update zone schema to add spatial index
  • Loading branch information
greg-does-weather authored Jul 31, 2024
2 parents 9e14e0f + ede886d commit 73f632a
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 139 deletions.
34 changes: 34 additions & 0 deletions docs/dev/spatial-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,37 @@ data into.
```sh
./scripts/load-spatial-data.sh <environment>
```

## Updating geospatial tables

Our geospatial data is managed by a set of utility scripts in the `spatial-data`
directory at the root of the project. Each spatial data source is represented by
a Javascript file in the `spatial-data/sources` directory. This Javascript file
includes code for creating and updating schema versions as well as code for
loading the actual data into the database.

A sources script file should export an object that looks roughly like this:

```js
table: <string>,
schemas: {
<version | int>: async function() <bool> {},
<version | int>: async function() <bool> {},
},
loadData: async function() {}
```

The `table` property is the name of the table used by the source.

The `schemas` property is an object whose keys are integers representing schema
versions. The keys are used to determine the list of schema upgrades necessary
for a given table. The values are functions that are called if a given schema
upgrade is necessary. A schema upgrade function should return `true` if the
source data needs to be reloaded or `false` if only a schema change is
necessary. If there is an update that _only_ requires reloading data, there
should be still be an new version created in the `schemas` property, but its
function should simply return `true`.

The `loadData` function is called if any necessary schema upgrades also need
data to be loaded/reloaded. The `loadData` should assume that the schema is the
most recent version and should not need to do any schema consistency checks.
31 changes: 21 additions & 10 deletions spatial-data/lib/meta.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
const { openDatabase } = require("./db.js");

const { metadata: counties } = require("../sources/counties.js");
const { metadata: cwas } = require("../sources/countyWarningAreas.js");
const { metadata: places } = require("../sources/places.js");
const { metadata: states } = require("../sources/states.js");
const { metadata: zones } = require("../sources/zones.js");
const counties = require("../sources/counties.js");
const cwas = require("../sources/countyWarningAreas.js");
const places = require("../sources/places.js");
const states = require("../sources/states.js");
const zones = require("../sources/zones.js");

// These should be in dependency order. That is, if any table depends on another
// table, the dependent table should be listed *after* its dependency.
const targets = {
states,
counties,
cwas,
places,
states,
zones,
};

Expand Down Expand Up @@ -43,10 +45,16 @@ module.exports = async () => {
const results = {};
for (const [target, metadata] of Object.entries(targets)) {
const databaseVersion = +(existing[metadata.table] ?? 0);
const currentVersion = metadata?.version ?? 0;

const currentVersion = Math.max(
...Object.keys(metadata?.schemas).map((v) => +v),
);

results[target] = {
update: currentVersion > databaseVersion,
from: databaseVersion,
to: currentVersion,
metadata,
};
}

Expand All @@ -60,14 +68,17 @@ module.exports.update = async () => {

for await (const [source, metadata] of Object.entries(targets)) {
if (meta[source].update) {
console.log(`setting ${metadata.table} to version ${metadata.version}`);
const currentVersion = Math.max(
...Object.keys(metadata?.schemas).map((v) => +v),
);
console.log(`setting ${metadata.table} to version ${currentVersion}`);

// UPSERT query, essentially
const sql = `INSERT INTO weathergov_geo_metadata
(table_name, version)
VALUES("${metadata.table}", "${metadata.version}")
VALUES("${metadata.table}", "${currentVersion}")
ON DUPLICATE KEY
UPDATE version="${metadata.version}"`;
UPDATE version="${currentVersion}"`;
await db.query(sql);
}
}
Expand Down
19 changes: 19 additions & 0 deletions spatial-data/lib/schema.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module.exports = async ({ from, metadata: { table, schemas } }) => {
const schemaVersions = Object.keys(schemas).filter(
(version) => +version > from,
);
const upgrades = [...Array(schemaVersions.length)].map(
// Plus one because our schema versions are 1-based, not 0-based.
(_, i) => i + from + 1,
);

let needsDataUpdate = false;
for await (const version of upgrades) {
console.log(` upgrading ${table} schema to version ${version}`);
const versionNeedsDataUpdate = await schemas[version]();

needsDataUpdate = needsDataUpdate || versionNeedsDataUpdate;
}

return needsDataUpdate;
};
32 changes: 12 additions & 20 deletions spatial-data/load-shapefiles.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
const { downloadAndUnzip, unzip } = require("./lib/prep.js");

const metadata = require("./lib/meta.js");
const loadCounties = require("./sources/counties.js");
const loadCWAs = require("./sources/countyWarningAreas.js");
const loadPlaces = require("./sources/places.js");
const loadStates = require("./sources/states.js");
const loadZones = require("./sources/zones.js");
const updateSchema = require("./lib/schema.js");

async function main() {
const meta = await metadata();
Expand All @@ -29,6 +25,7 @@ async function main() {
const zips = [];

for (const [target, { update }] of Object.entries(meta)) {
console.log(`Fetching data for ${target}...`);
if (update) {
if (dataUrls[target]) {
urls.push(...dataUrls[target]);
Expand All @@ -37,7 +34,7 @@ async function main() {
zips.push(...dataZips[target]);
}
} else {
console.log(`${target} already up-to-date; skipping`);
console.log(` already up-to-date; skipping`);
}
}

Expand All @@ -49,20 +46,15 @@ async function main() {
await unzip(zip);
}

if (meta.states.update) {
await loadStates();
}
if (meta.counties.update) {
await loadCounties();
}
if (meta.cwas.update) {
await loadCWAs();
}
if (meta.zones.update) {
await loadZones();
}
if (meta.places.update) {
await loadPlaces();
for await (const [source, sourceMetadata] of Object.entries(meta)) {
if (sourceMetadata.update) {
console.log(`${source} needs updating...`);
const importData = await updateSchema(sourceMetadata);
if (importData) {
console.log(` ${source} requires data loading...`);
await sourceMetadata.metadata.loadData();
}
}
}

await metadata.update();
Expand Down
72 changes: 39 additions & 33 deletions spatial-data/sources/counties.js
Original file line number Diff line number Diff line change
@@ -1,38 +1,44 @@
const shapefile = require("shapefile");

const { table: statesTable } = require("./states.js");
const { dropIndexIfExists, openDatabase } = require("../lib/db.js");

const metadata = {
table: "weathergov_geo_counties",
version: 1,
};

module.exports = async () => {
console.log("loading counties...");
const db = await openDatabase();
const schemas = {
1: async () => {
const db = await openDatabase();

await db.query(
`CREATE TABLE IF NOT EXISTS
${metadata.table}
(
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
state VARCHAR(2),
stateName TEXT,
stateFips VARCHAR(2),
countyName TEXT,
countyFips VARCHAR(5),
timezone TEXT,
dst BOOLEAN,
shape MULTIPOLYGON NOT NULL
)`,
);

await db.end();

return true;
},
};

const loadData = async () => {
console.log(" loading counties data");

const db = await openDatabase();
const file = await shapefile.open(`./c_05mr24.shp`);

await db.query(
`CREATE TABLE IF NOT EXISTS
${metadata.table}
(
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
state VARCHAR(2),
stateName TEXT,
stateFips VARCHAR(2),
countyName TEXT,
countyFips VARCHAR(5),
timezone TEXT,
dst BOOLEAN,
shape MULTIPOLYGON NOT NULL
)`,
);
await dropIndexIfExists(
db,
"counties_spatial_idx",
"weathergov_geo_counties",
);
await dropIndexIfExists(db, "counties_spatial_idx", metadata.table);

const shapeTzToIANA = new Map([
["V", "America/Puerto_Rico"],
Expand All @@ -47,8 +53,8 @@ module.exports = async () => {
["S", "Pacific/Pago_Pago"],
]);

await db.query("TRUNCATE TABLE weathergov_geo_counties");
await db.query("ALTER TABLE weathergov_geo_counties AUTO_INCREMENT=0");
await db.query(`TRUNCATE TABLE ${metadata.table}`);
await db.query(`ALTER TABLE ${metadata.table} AUTO_INCREMENT=0`);

const getSqlForShape = async ({ done, value }) => {
if (done) {
Expand All @@ -71,7 +77,7 @@ module.exports = async () => {
const observesDST = tz.toUpperCase() === tz;

await db.query(
`INSERT INTO weathergov_geo_counties
`INSERT INTO ${metadata.table}
(state, countyName, countyFips, timezone, dst, shape)
VALUES(
'${state}',
Expand All @@ -91,25 +97,25 @@ module.exports = async () => {
// Once we've got all the counties loaded, grab the associated full state
// names and state FIPS codes from the states table.
await db.query(
`UPDATE weathergov_geo_counties c
`UPDATE ${metadata.table} c
SET
stateName=(
SELECT name FROM weathergov_geo_states s
SELECT name FROM ${statesTable} s
WHERE
s.state=c.state
),
stateFips=(
SELECT fips FROM weathergov_geo_states s
SELECT fips FROM ${statesTable} s
WHERE
s.state=c.state
)`,
);

await db.query(
"CREATE SPATIAL INDEX counties_spatial_idx ON weathergov_geo_counties(shape)",
`CREATE SPATIAL INDEX counties_spatial_idx ON ${metadata.table}(shape)`,
);

db.end();
};

module.exports.metadata = metadata;
module.exports = { ...metadata, schemas, loadData };
45 changes: 27 additions & 18 deletions spatial-data/sources/countyWarningAreas.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,38 @@ const { dropIndexIfExists, openDatabase } = require("../lib/db.js");

const metadata = {
table: "weathergov_geo_cwas",
version: 1,
};

module.exports = async () => {
console.log("loading WFOs...");
const schemas = {
1: async () => {
const db = await openDatabase();

await db.query(`
CREATE TABLE IF NOT EXISTS
${metadata.table}
(
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
wfo VARCHAR(3),
cwa VARCHAR(3),
region VARCHAR(2),
city VARCHAR(50),
state VARCHAR(50),
st VARCHAR(2),
shape MULTIPOLYGON NOT NULL
)`);

await db.end();

return true;
},
};

const loadData = async () => {
console.log(" loading WFOs/CWAs data");
const db = await openDatabase();

const file = await shapefile.open(`./w_05mr24.shp`);

await db.query(`
CREATE TABLE IF NOT EXISTS
${metadata.table}
(
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
wfo VARCHAR(3),
cwa VARCHAR(3),
region VARCHAR(2),
city VARCHAR(50),
state VARCHAR(50),
st VARCHAR(2),
shape MULTIPOLYGON NOT NULL
)`);

await dropIndexIfExists(db, "cwas_spatial_idx", "weathergov_geo_cwas");
await db.query("TRUNCATE TABLE weathergov_geo_cwas");
await db.query("ALTER TABLE weathergov_geo_cwas AUTO_INCREMENT=0");
Expand Down Expand Up @@ -79,4 +88,4 @@ CREATE TABLE IF NOT EXISTS
db.end();
};

module.exports.metadata = metadata;
module.exports = { ...metadata, schemas, loadData };
Loading

0 comments on commit 73f632a

Please sign in to comment.