Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial data management tweaks action #2064

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/actions/update-spatial-data/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
inputs:
environment:
required: true
cf-space:
required: true
cf-username:
required: true
cf-password:
required: true
runs:
using: composite
steps:
- uses: actions/checkout@v4
- uses: dcarbone/install-jq-action@v2
- shell: bash
run: |
wget -O cf-cli.deb "https://packages.cloudfoundry.org/stable?release=debian64&source=github"
sudo dpkg -i cf-cli.deb
cf login -a api.fr.cloud.gov -u ${{ inputs.cf-username }} -p ${{ inputs.cf-password }}
docker compose build spatial
docker compose pull database
- name: update spatial data tables
shell: bash
run: ./scripts/load-spatial-data.sh ${{ inputs.environment }}
8 changes: 8 additions & 0 deletions .github/workflows/code-standards.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ jobs:
with:
fetch-depth: 0

- name: spatial update
uses: ./.github/actions/update-spatial-data
with:
environment: greg
cf-space: greg
cf-username: ${{ secrets.CF_GREG_USERNAME }}
cf-password: ${{ secrets.CF_GREG_PASSWORD }}

- name: list files that have changed from main
id: diff
run: echo "diff=$(git diff --name-only origin/main)" | tr "\n" "\t" >> $GITHUB_OUTPUT
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ services:
volumes:
- ./spatial-data/:/app
- /app/node_modules
extra_hosts:
- host.docker.internal:host-gateway

networks:
weather.gov:
5 changes: 4 additions & 1 deletion scripts/load-spatial-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ while read -r host port db username password; do
echo "setting up SSH tunnel..."
# open a tunnel
cf ssh -N -T -L "$LOCAL_PORT":"$host":"$port" "$TARGET" &
sleep 5
while ! netstat -tna | grep 'LISTEN' | grep -q ":$LOCAL_PORT"; do
echo "...port not ready"
sleep 3 # time in seconds, tune it as needed
done

# load
docker compose run --rm -T spatial node load-shapefiles.js "$username" "$password" "$db" host.docker.internal "$LOCAL_PORT"
Expand Down
10 changes: 10 additions & 0 deletions spatial-data/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
# We will fetch most of the source spatial data as-needed. The data directory
# is a local cache, but it shouldn't be persisted elsewhere. The one exception
# is the cities500 dataset, which is not controlled by a US government entity,
# so we don't want to fetch it dynamically.
data/*
!data/us.cities500.txt.zip

# This is a temporary file created when zapping a dev environment. It's used
# to more quickly restore the spatial data tables instead of having to rebuild
# them from source each time.
dump.mysql
62 changes: 29 additions & 33 deletions spatial-data/lib/meta.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,24 @@ const targets = {
zones,
};

module.exports = async () => {
const db = await openDatabase();

const initializeMetadataTable = async (db) => {
await db.query(
`CREATE TABLE IF NOT EXISTS
weathergov_geo_metadata
(
table_name varchar(512) NOT NULL PRIMARY KEY,
version SMALLINT UNSIGNED
version SMALLINT UNSIGNED DEFAULT 0
)`,
);
};

module.exports = async () => {
const db = await openDatabase();

await initializeMetadataTable(db);

// Get the existing schema and data versions for our tables and mape it into
// a useful data structure.
const existing = await db
.query("SELECT * FROM weathergov_geo_metadata")
.then(([rows]) =>
Expand All @@ -42,45 +48,35 @@ module.exports = async () => {

await db.end();

const results = {};
const pendingUpdates = [];

// Iterate through all of our sources and figure out which ones need to be
// updated in which ways.
for (const [target, metadata] of Object.entries(targets)) {
const databaseVersion = +(existing[metadata.table] ?? 0);
const currentSchemaVersion = +(existing[metadata.table] ?? 0);

const currentVersion = Math.max(
const wantedSchemaVersion = Math.max(
...Object.keys(metadata?.schemas).map((v) => +v),
);

results[target] = {
update: currentVersion > databaseVersion,
from: databaseVersion,
to: currentVersion,
pendingUpdates.push({
target,
update: wantedSchemaVersion > currentSchemaVersion,
from: currentSchemaVersion,
to: wantedSchemaVersion,
metadata,
};
});
}

return results;
return pendingUpdates;
};

module.exports.update = async (target) => {
const meta = await module.exports();

const metadata = targets[target];

if (meta[target].update) {
const currentVersion = Math.max(
...Object.keys(metadata?.schemas).map((v) => +v),
);
console.log(`setting ${metadata.table} to version ${currentVersion}`);

const db = await openDatabase();

// UPSERT query, essentially
const sql = `INSERT INTO weathergov_geo_metadata
module.exports.update = async (db, table, version) => {
// UPSERT query, essentially
const sql = `INSERT INTO weathergov_geo_metadata
(table_name, version)
VALUES("${metadata.table}", "${currentVersion}")
VALUES(?,?)
ON DUPLICATE KEY
UPDATE version="${currentVersion}"`;
await db.query(sql);
await db.end();
}
UPDATE version=?`;
await db.query(sql, [table, version, version]);
};
33 changes: 19 additions & 14 deletions spatial-data/lib/prep.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
const { exec: nodeExec } = require("node:child_process");
const fs = require("node:fs/promises");
const path = require("node:path");
const chalk = require("chalk");
const { fileExists } = require("./util.js");

const DATA_PATH = "./data";

const exec = async (...args) =>
new Promise((resolve, reject) => {
nodeExec(...args, (err, stdout) => {
Expand All @@ -14,34 +18,35 @@ const exec = async (...args) =>
});

module.exports.downloadAndUnzip = async (url) => {
const filename = url.split("/").pop();
const filePath = path.join(DATA_PATH, url.split("/").pop());

if (!(await fileExists(filename))) {
console.log(`Downloading ${filename}...`);
console.log(` ${chalk.blue(`getting data for ${filePath}`)}`);
if (!(await fileExists(filePath))) {
console.log(` ${chalk.yellow(`● downloading ${filePath}...`)}`);
const data = await fetch(url)
.then((r) => r.blob())
.then((blob) => blob.arrayBuffer());
await fs.writeFile(filename, Buffer.from(data));
await fs.writeFile(filePath, Buffer.from(data));
console.log(` ${chalk.green("●")} downloaded`);
} else {
console.log(`${filename} is already present`);
console.log(` ${chalk.green("●")} ${filePath} is already present`);
}

await exec(`unzip -t ${filename}`)
await exec(`unzip -t ${filePath}`)
.then(async () => {
await module.exports.unzip(filename);

console.log(` [${filename}] done`);
await module.exports.unzip(filePath);
})
.catch(async () => {
console.log("zip file is corrupt. Trying again...");
await fs.unlink(filename);
console.log(chalk.red(" ● zip file is corrupt. Trying again..."));
await fs.unlink(filePath);
await module.exports.downloadAndUnzip(url);
});
};

module.exports.unzip = async (path) => {
console.log(` [${path}] decompressing...`);
module.exports.unzip = async (filePath, outDirectory = "./data") => {
console.log(` ${chalk.yellow(`● decompressing ${filePath}`)}`);

// Use -o to overwrite existing files.
await exec(`unzip -o -u ${path}`);
await exec(`unzip -o -u ${filePath} -d ${outDirectory}`);
console.log(` ${chalk.green("●")} ${filePath} decompressed`);
};
19 changes: 0 additions & 19 deletions spatial-data/lib/schema.js

This file was deleted.

47 changes: 47 additions & 0 deletions spatial-data/lib/update.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
const chalk = require("chalk");
const metadata = require("./meta.js");
const { openDatabase } = require("./db");

module.exports = async ({ target, from, metadata: { table, schemas } }) => {
const schemaVersions = Object.keys(schemas).filter(
(version) => +version > from,
);
const upgrades = [...Array(schemaVersions.length)].map(
// Plus one because our schema versions are 1-based, not 0-based.
(_, i) => i + from + 1,
);

console.log(`━━━━━━ Updating ${target} [${table}] ━━━━━`);

for await (const version of upgrades) {
const { schema, data } = schemas[version];

const db = await openDatabase();
try {
await db.beginTransaction();
console.log(chalk.blue(` to version ${version}`));
if (schema) {
console.log(chalk.yellow(" ● updating schema"));
await schema(db);
}
if (data) {
console.log(chalk.yellow(" ● updating data"));
await data(db);
}

console.log(chalk.yellow(" ● updating metadata"));
await metadata.update(db, table, version);
await db.commit();
console.log(
chalk.green(` ● successfully updated to version ${version}`),
);
} catch (e) {
console.log(" ERROR");
console.log(e);
await db.rollback();
break;
} finally {
await db.end();
}
}
};
43 changes: 25 additions & 18 deletions spatial-data/load-shapefiles.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
const path = require("node:path");
const chalk = require("chalk");
const { downloadAndUnzip, unzip } = require("./lib/prep.js");

const metadata = require("./lib/meta.js");
const updateSchema = require("./lib/schema.js");
const updateTable = require("./lib/update.js");

async function main() {
const meta = await metadata();
Expand All @@ -24,37 +26,42 @@ async function main() {
const urls = [];
const zips = [];

for (const [target, { update }] of Object.entries(meta)) {
console.log(`Fetching data for ${target}...`);
let hasUpdates = false;

for (const { target, update } of meta) {
if (update) {
hasUpdates = true;
if (dataUrls[target]) {
urls.push(...dataUrls[target]);
}
if (dataZips[target]) {
zips.push(...dataZips[target]);
}
} else {
console.log(` already up-to-date; skipping`);
}
}

for await (const url of urls) {
await downloadAndUnzip(url);
if (!hasUpdates) {
console.log(chalk.green("Everything is already up-to-date. Stopping!"));
// return;
}

for await (const zip of zips) {
await unzip(zip);
if (urls.length) {
console.log("━━━━━━ Downloading needed data ━━━━━");
for await (const url of urls) {
await downloadAndUnzip(url);
}
}

for await (const [source, sourceMetadata] of Object.entries(meta)) {
if (sourceMetadata.update) {
console.log(`${source} needs updating...`);
const importData = await updateSchema(sourceMetadata);
if (importData) {
console.log(` ${source} requires data loading...`);
await sourceMetadata.metadata.loadData();
}
await metadata.update(source);
if (zips.length) {
console.log("━━━━━━ Decompressing local data ━━━━━");
for await (const zip of zips) {
await unzip(path.join("./data", zip));
}
}

for await (const updateMetadata of meta) {
if (updateMetadata.update) {
await updateTable(updateMetadata);
}
}
}
Expand Down
Loading
Loading