From 0f6d1727be11a6a3e6d536eb3167afda8c462115 Mon Sep 17 00:00:00 2001 From: scott haselton Date: Thu, 2 Jan 2025 12:57:09 -0800 Subject: [PATCH] supporting gzip checks (#28) * supporting gzip checks * update documentation explaining new functionality * Update README.md Co-authored-by: Mint Thompson --------- Co-authored-by: Mint Thompson --- README.md | 4 +++- src/commands.ts | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a10ef72..cc9f6f2 100644 --- a/README.md +++ b/README.md @@ -54,11 +54,13 @@ Overriding the default error limit to show all errors and warnings: ```sh cms-hpt-validator ./sample.csv v2.0.0 -e 0 ``` +### Machine-readable File Extensions +The two current allowable file formats for the HPT MRFs are CSV and JSON. The CLI will auto detect the file format passed into the tool for files that end with `.csv` or `.json` and will run the appropriate validator for that file. The CLI can also detect files compressed by gzip. Files ending with the `.gz` extension will be decompressed before validation. These file format detections can be combined, so files ending with `.csv.gz` or `.json.gz` will be decompressed and validated as CSV or JSON, respectively. For other files ending with `.gz`, use the `-f` option described above. + ## Limitations There may be a situation in which the CLI tool will run out of memory due to the amount of errors that are found in the file being validated. If you run into this NODE error, update the amount of errors to a smaller value that will be allowed to be collected with the `-e, --error-limit` flag. - ## Contributing Thank you for considering contributing to an Open Source project of the US diff --git a/src/commands.ts b/src/commands.ts index c400013..7c31e99 100644 --- a/src/commands.ts +++ b/src/commands.ts @@ -1,6 +1,7 @@ import fs from "fs" import path from "path" import chalk from "chalk" +import zlib from "zlib" import { CsvValidationOptions, JsonValidatorOptions, @@ -24,7 +25,13 @@ export async function validate( return } - const inputStream = fs.createReadStream(filepath, "utf-8") + const inputStream = filepath.endsWith(".gz") + ? fs + .createReadStream(filepath) + .pipe(zlib.createGunzip()) + .setEncoding("utf-8") + : fs.createReadStream(filepath, "utf-8") + const validationResult = await validateFile( inputStream, version, @@ -64,7 +71,7 @@ export async function validate( } async function validateFile( - inputStream: fs.ReadStream, + inputStream: fs.ReadStream | NodeJS.ReadableStream, version: string, format: FileFormat, validatorOptions: CsvValidationOptions | JsonValidatorOptions @@ -93,6 +100,10 @@ function getFileFormat( ): FileFormat | null { if (fileFormat.format) return fileFormat.format as FileFormat + if (filepath.endsWith(".gz")) { + filepath = filepath.slice(0, -3) + } + const fileExt = path.extname(filepath).toLowerCase().replace(".", "") if (["csv", "json"].includes(fileExt)) { return fileExt as FileFormat