This repository has been archived by the owner on Sep 24, 2024. It is now read-only.
forked from ezwelty/opentrees-harvester
-
Notifications
You must be signed in to change notification settings - Fork 1
/
source.js
1118 lines (1080 loc) · 37.3 KB
/
source.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* Describe a source dataset.
*
* @module
*/
const fs = require('fs')
const path = require('path')
const util = require('util')
const exec = util.promisify(require('child_process').exec)
const colors = require('colors')
const glob = require('glob')
const gdal = require('gdal-async')
const { DownloaderHelper } = require('node-downloader-helper')
const decompress = require('decompress')
const { table } = require('table')
const helpers = require('./helpers')
const { downloadWfsDataInXmlFormat } = require('./wfs_download')
const { search } = require('../archiver/archive')
const archiver = require('../archiver/archive')
/**
* Properties used by {@link Source} for data processing.
*
* @typedef {object} SourceProperties
* @property {string} id - Identifier prepended to console output.
* @property {string|string[]} download - Path to remote files to download and
* unpack.
* @property {string} wfsLayer - URL of WFS remote link to download
* @property {string} featureLayer - Path to ArcGIS Feature Server layer.
* See https://developers.arcgis.com/rest/services-reference/enterprise/query-feature-service-layer-.htm.
* @property {string|string[]} execute - Shell commands executed from working
* directory (`Source.dir`) after file download and unpack. In `npm run`
* commands, prepend the `INIT_CWD` variable to paths to the files
* (https://docs.npmjs.com/cli/run-script).
* @property {string} filename - Glob pattern (relative to working directory)
* used to find the file to read. Only needed when there are multiple files and
* either none or multiple have extensions recognized by GDAL.
* @property {string} srs - Spatial reference system in any format supported by
* [OGRSpatialReference.SetFromUserInput()](https://gdal.org/api/ogrspatialref.html#classOGRSpatialReference_1aec3c6a49533fe457ddc763d699ff8796).
* @property {object} geometry - Geometry field names for formats without
* explicit geometries (e.g. tabular text files like CSV). If not provided, will
* attempt to guess from field names.
* @property {string} geometry.wkt - Name of field with well-known-text (wkt)
* geometry. If provided, takes precedence over x, y.
* @property {string} geometry.x - Name of field with x coordinate (longitude,
* easting).
* @property {string} geometry.y - Name of field with y coordinate (latitude,
* northing).
* @property {Object.<string, string|function>} crosswalk - Crosswalk mapping to
* a target schema. For each `key: value` pair, `key` is the new field name and
* `value` is either the old field name (e.g. `height: 'HEIGHT'`) or a function
* that takes an object (of feature field values) and returns a value (e.g.
* `height: obj => obj.HEIGHT / 100`).
* @property {function} delFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns a value (e.g. `obj =>
* obj.HEALTH === 'dead'`). The feature is excluded from the output if the
* returned value evaluates to `true`.
* @property {function} coordsFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns a number array of point
* coordinates `[x, y]`. This is a useful alternative to `geometry` if the
* coordinates need to be extracted from field values (e.g. `obj =>
* obj.XY.split(';').map(Number)`).
*/
/**
* Additional properties not used by {@link Source} but used downstream.
*
* @typedef {SourceProperties} SourcePropertiesExtended
* @property {string} pending - Pending issues preventing processing.
* @property {string} primary - `id` of the primary source (for grouping sources
* together).
* @property {string} long - Full name of the government body, university, or
* other institution (e.g. `City of Melbourne`).
* @property {string} short - Short name (e.g. `Melbourne`).
* @property {string} country - Country name in English (e.g. `Australia`).
* @property {object} centre - Centre point (for map label placement).
* @property {number} centre.lon - Longitude in decimal degrees (EPSG:4326).
* @property {number} centre.lat - Latitude in decimal degrees (EPSG:4326).
* @property {string} info - Path to page with more information.
* @property {string} language - Language of contents as an [ISO
* 639-1](https://en.wikipedia.org/wiki/ISO_639-1) code (e.g. `en`) and an
* optional [ISO 3166-1 alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)
* region code (e.g. `en-AU`).
* @property {object} license - Data license.
* @property {string} license.id - License identifier from the Software Package
* Data Exchange (SPDX) [license list](https://spdx.org/licenses/) (e.g.
* `CC-BY-4.0`).
* @property {string} license.name - License name (e.g. `Creative Commons
* Attribution 4.0 International`).
* @property {string} license.url - Path to page with license text (e.g.
* `https://creativecommons.org/licenses/by/4.0`).
*/
/**
* Class representing a source dataset.
*
* @param {SourceProperties} props - Source properties.
* @param {string} dir - Local directory to which remote files are downloaded
* and where local files are searched for.
* @param {object} [options]
* @param {boolean} [options.exit=true] - Whether to throw errors or print them
* to the console.
* @param {string} [options.srs=EPSG:4326] - Spatial reference system to
* assume if none is defined in `props.srs` and none can be read from the input
* files.
*/
class Source {
constructor(props, dir, options = {}) {
this.props = props
this.dir = dir
this.options = {
...{
exit: true,
srs: 'EPSG:4326'
},
...options
}
// Validate
this.validate(true)
// Cache
this.__dataset = null
this.__vrt = null
}
/**
* Validate source properties.
*
* @param {boolean} [error=false] - Whether to raise errors.
* @return {Array.<Array<string, *>>} Errors in the format [message, value].
* @private
*/
validate(error = false) {
let errors = []
const props = this.props
// id
if (props.id) {
if (typeof props.id !== 'string') {
errors.push(['Invalid id', props.id])
}
} else {
errors.push(['Missing id'])
}
// download
if (props.download) {
if (!(typeof props.download === 'string' ||
(Array.isArray(props.download) && typeof props.download[0] === 'string'))) {
errors.push(['Invalid download', props.download])
}
}
// featureLayer
if (props.featureLayer) {
if (!(typeof props.featureLayer === 'string')) {
errors.push(['Invalid featureLayer', props.featureLayer])
}
}
// execute
if (props.execute) {
if (!(typeof props.execute === 'string' ||
(Array.isArray(props.execute) && typeof props.execute[0] === 'string'))) {
errors.push(['Invalid execute', props.execute])
}
}
// filename
if (props.filename && typeof props.filename !== 'string') {
errors.push(['Invalid filename', props.filename])
}
// crosswalk
if (props.crosswalk) {
Object.keys(props.crosswalk).forEach(key => {
const value = props.crosswalk[key]
if (!['string', 'function'].includes(typeof (value))) {
errors.push([`Invalid type for crosswalk.${key}`, typeof value])
}
})
}
// geometry
if (props.geometry) {
if (!(typeof (props.geometry) === 'object' &&
(typeof (props.geometry.wkt) === 'string' ||
(typeof (props.geometry.x) === 'string' &&
typeof (props.geometry.y) === 'string')))) {
errors.push(['Invalid geometry', props.geometry])
}
}
// srs
if (props.srs) {
try {
gdal.SpatialReference.fromUserInput(props.srs)
} catch (err) {
errors.push(['Invalid srs', props.srs])
}
}
if (error && errors.length) {
this.error('Validation failed:', errors)
} else {
return errors
}
}
/**
* Prepare remote source data for processing.
*
* Downloads remote data (`this.props.download`, `this.props.featureLayer`),
* unpacks compressed or archive files,
* and executes shell commands (`this.props.execute`).
*
* @param {boolean} [overwrite=false] - Whether to proceed if working
* directory is not empty (see {@link Source#isEmpty}).
* @return {Promise<string[]>} Resolves to the paths of the downloaded and
* unpacked local files (if any).
*/
async get(overwrite = false) {
if (
!(this.props.download || this.props.featureLayer) ||
(!overwrite && !this.isEmpty())
) {
return []
}
try {
const paths = []
const urlToPathMap = new Map()
var urls = this.props.download;
if (typeof urls == 'string') {
urls = [urls]
}
for (var url of urls) {
const results_in_archiver = search({url: url}, limit=null, minAge=24)
if (results_in_archiver > 0) {
this.error("We have already downloaded the data at the url: " + url + " in the past 24 hours. Aborting.")
}
const file_path = await this.getFile(url)
paths.push(file_path)
urlToPathMap.set(url, file_path)
}
if (this.props.wfsLayer) {
await downloadWfsDataInXmlFormat(this.props.wfsLayer, this.dir)
const output = path.join(this.dir, 'merged.xml')
paths.push(output)
urlToPathMap.set(this.props.wfsLayer, output)
}
if (this.props.featureLayer) {
const layer = await helpers.getLayerFeatures(this.props.featureLayer)
const output = path.join(this.dir, `${layer.name}.geojsonl`)
helpers.writeJSONL(layer.features, output)
this.success(`Fetched ${this.props.featureLayer} as '${output}'`)
paths.push(output)
urlToPathMap.set(this.props.featureLayer, output)
}
for (const [key, value] of urlToPathMap) {
archiver.log({
url: key,
path: value,
date: new Date(),
type: 'data'
})
}
await this.execute()
this.success('Ready to process')
return paths
} catch (error) {
throw error
}
}
/**
* Process input and write to output.
*
* Reading, writing, and coordinate transformations are performed by
* [GDAL](https://gdal.org) via the
* [node-gdal-next](https://www.npmjs.com/package/gdal-next) bindings.
*
* Processing steps include a schema crosswalk (`this.props.crosswalk`),
* skipping features by field values (`this.props.delFunc`), reducing complex
* geometries to centroid points (`options.centroids`), and skipping features
* outside a bounding box (`options.bounds`). For files without explicit
* geometries, a temporary [VRT](https://gdal.org/drivers/vector/vrt.html)
* file is created (see {@link Source#getVrt}).
*
* @param {string} file - Output file path.
* @param {object} [options] - Output options.
* @param {string} [options.driver] - Name of GDAL driver to use to write to
* the output file (see https://gdal.org/drivers/vector). Guessed from file
* extension if not provided.
* @param {string[]|object} [options.creation] - Driver-specific dataset
* creation options (see https://gdal.org/drivers/vector). Only default, for
* 'CSV', is `['GEOMETRY=AS_WKT']` to include feature geometry in output.
* @param {boolean} [options.overwrite=false] - Whether to proceed if `file`
* already exists.
* @param {string} [options.srs=+init=epsg:4326] - Output spatial reference
* system in any format supported by
* [OGRSpatialReference.SetFromUserInput()](https://gdal.org/api/ogrspatialref.html#classOGRSpatialReference_1aec3c6a49533fe457ddc763d699ff8796).
* Use 'EPSG:*' for (latitude, longitude) and '+init=epsg:*' (PROJ<6 behavior)
* for (longitude, latitude). If it is the same as the input SRS, axis order
* will remain unchanged regardless.
* @param {boolean} [options.centroids=false] - Whether to reduce non-point
* geometries to centroids.
* @param {boolean} [options.keepInvalid=false] - Whether to keep features
* with empty or invalid geometries.
* @param {boolean} [options.keepFields=false] - Whether to keep the input
* feature fields alongside the result of the schema crosswalk
* (`this.props.crosswalk`).
* @param {boolean} [options.keepGeometryFields=false] - Whether to keep the
* input feature geometry fields. Applies only to inputs for which a VRT file
* is written (see {@link Source#getVrt}) and if `options.keepFields` is
* also `true`.
* @param {string} [options.prefix=] - String to append to input field names
* to prevent collisions with output field names. Applies only if
* `options.keepFields` is `true`.
* @param {number[]} [options.bounds] - Bounding box in output SRS
* (`options.srs`) in the format [xmin, ymin, xmax, ymax]. If provided,
* features outside the bounds are skipped.
* @param {function} [options.delFunc] - Function that takes an object (of
* feature field values after the crosswalk) and returns a value (e.g. `obj =>
* obj.description === 'vacant site'`). The feature is excluded from the
* output if the returned value evaluates to `true`.
* @param {boolean} [options.allowEmptyGeometry=false] - Whether to allow
* feature layer with empty geometry.
* @return {boolean} Whether processed file (true) or skipped (false).
*/
process(file, options = {}) {
if (!options.overwrite && fs.existsSync(file)) {
return false
}
options = {
...{
driver: null,
creation: null,
overwrite: false,
srs: '+init=epsg:4326',
centroids: false,
keepInvalid: false,
keepFields: false,
keepGeometryFields: false,
prefix: '',
bounds: null,
delFunc: null,
allowEmptyGeometry: false
},
...options
}
if (!options.driver) {
const extension = helpers.getFileExtension(file.toLowerCase())
const drivers = helpers.getGdalDrivers()[extension]
if (drivers && drivers.length == 1) {
options.driver = drivers[0]
} else {
this.error(`Failed to guess driver for *.${extension}:`, drivers)
}
} else {
options.driver = options.driver.toLowerCase()
}
if (!options.creation) {
options.creation = ({
csv: { GEOMETRY: 'AS_WKT' }
})[options.driver]
}
options.srs = gdal.SpatialReference.fromUserInput(options.srs)
// Read input
let input = this.open()
this.log(`Processing ${input.description}`)
if (input.layers.count() > 1) {
this.warn(`Using first of ${input.layers.count()} layers`)
}
let inputLayer = input.layers.get(0)
if (!inputLayer.features.count()) {
this.warn('Skipping: Layer has no features')
return
}
let emptyGeometry = false
if (!inputLayer.features.first().getGeometry() && !this.props.coordsFunc) {
if (options.allowEmptyGeometry) {
emptyGeometry = true
} else {
// Write (and then read) VRT file with geometry definition
this.log('Writing and reading VRT file')
input = this.openVrt(options.keepGeometryFields)
inputLayer = input.layers.get(0)
}
}
// Prepare input schema
let inputSchema = inputLayer.fields.map(field => field)
/*
* NOTE: Confusing gdal bindings handling of date/time fields
* - Fields detected as date/time are read as objects, not strings
* - Cannot yet set date/time field with date/time object, only strings
* (see https://github.com/naturalatlas/node-gdal/issues/144)
* HACK:
* - Set output date/time fields as string
* - Convert input date/time fields to string
*/
const stringCrosswalk = {}
inputSchema = inputSchema.map(field => {
const formatter = helpers.gdalStringFormatters[field.type]
if (formatter) {
stringCrosswalk[field.name] = x => formatter(x[field.name])
field.type = gdal.OFTString
}
return field
})
// Prepare crosswalks
const crosswalks = [
{
crosswalk: stringCrosswalk,
keep: true,
prefix: ''
},
{
crosswalk: this.props.crosswalk,
keep: options.keepFields,
prefix: options.prefix
}
]
// Prepare output schema
const outputSchema = []
if (this.props.crosswalk) {
for (const key in this.props.crosswalk) {
// NOTE: Output as string to preserve malformed values
outputSchema.push(new gdal.FieldDefn(key, gdal.OFTString))
}
}
if (options.keepFields) {
inputSchema.forEach(field => {
field.name = `${options.prefix}${field.name}`
outputSchema.push(field)
})
}
// Prepare output
const driver = gdal.drivers.get(options.driver)
if (!driver) {
this.error('Unrecognized GDAL driver:', options.driver)
}
fs.mkdirSync(path.dirname(file), { recursive: true })
const output = driver.create(file, 0, 0, 0, gdal.GDT_Byte, options.creation)
let outputType
if (options.centroids || inputLayer.geomType == gdal.wkbNone) {
outputType = gdal.wkbPoint
} else {
outputType = inputLayer.geomType
}
const outputLayer = output.layers.create(inputLayer.name, options.srs,
outputType)
outputLayer.fields.add(outputSchema)
let transform
if (!emptyGeometry) {
const srs = this.getSrs(inputLayer)
transform = helpers.getTransform(srs, options.srs)
if (options.bounds) {
if (transform && !helpers.isAxesXY(options.srs)) {
// Swap x, y
options.bounds = [
options.bounds[1], options.bounds[0],
options.bounds[3], options.bounds[2]
]
}
options.bounds = helpers.boundsToPolygon(options.bounds)
}
}
// Populate output
const counts = {
invalidParsedCoordinates: 0,
invalidGeometries: 0,
emptyGeometries: 0,
outOfBoundGeometries: 0
}
let inputFeature
for (
inputFeature = inputLayer.features.first();
inputFeature;
inputFeature = inputLayer.features.next()) {
// Fields
const inputFields = inputFeature.fields.toObject()
if (this.props.delFunc && this.props.delFunc(inputFields)) {
continue
}
const outputFeature = new gdal.Feature(outputLayer)
const outputFields = helpers.mapObject(inputFields, crosswalks)
if (options.delFunc && options.delFunc(outputFields)) {
continue
}
outputFeature.fields.set(outputFields)
// Geometry
if (!emptyGeometry) {
let inputGeometry
if (this.props.coordsFunc) {
const coords = this.props.coordsFunc(inputFields)
if (Array.isArray(coords) && coords.length == 2) {
inputGeometry = new gdal.Point(coords[0], coords[1])
inputGeometry.srs = srs
} else {
counts.invalidParsedCoordinates++
// this.warn(`Invalid parsed coordinates at ${inputFeature.fid}:`, coords)
if (!options.keepInvalid) continue
}
} else {
inputGeometry = inputFeature.getGeometry()
}
if (inputGeometry) {
if (options.centroids && inputGeometry.wkbType != gdal.wkbPoint) {
inputGeometry = inputGeometry.centroid()
}
let isValid = true
let isPoint = inputGeometry.wkbType == gdal.wkbPoint
if (transform) {
try {
inputGeometry.transform(transform)
} catch (error) {
isValid = false
}
} else {
isValid = inputGeometry.isValid()
if (isPoint) {
isValid = isValid && inputGeometry.x && inputGeometry.y &&
isFinite(inputGeometry.x) && isFinite(inputGeometry.y)
}
}
if (!isValid) {
counts.invalidGeometries++
// const msg = `Invalid ${inputGeometry.name} at ${inputFeature.fid}`
// if (isPoint) {
// this.warn(msg, (({ x, y }) => ({ x, y }))(inputGeometry))
// } else {
// this.warn(msg)
// }
if (!options.keepInvalid) continue
}
if (options.bounds && isValid) {
if (!inputGeometry.within(options.bounds)) {
counts.outOfBoundGeometries++
// this.warn(`Out of bounds ${inputGeometry.name} at ${inputFeature.fid}`)
continue
}
}
outputFeature.setGeometry(inputGeometry)
} else {
counts.emptyGeometries++
// this.warn(`Empty geometry at ${inputFeature.fid}`)
if (!options.keepInvalid) continue
}
}
// TODO: flush after each n features
outputLayer.features.add(outputFeature)
}
// Print warnings
const nonZeroCounts = Object.fromEntries(
Object.entries(counts).filter(([_, v]) => v > 0)
)
if (Object.keys(nonZeroCounts).length) {
this.warn('Warnings (by number of features):', nonZeroCounts)
}
// Write
output.close()
this.success('Wrote output:', file)
return true
}
/**
* Get layer field names and GDAL data types.
* @return {object} Field names (keys) and GDAL data types (values)
*/
getFields() {
const layer = this.open().layers.get(0)
const fields = {}
layer.fields.forEach(field => {
fields[field.name] = field.type
})
return fields
}
/**
* Get feature fields.
* @param {integer} [n=Infinity] - Maximum number of features to read.
* @return {object[]}
*/
getRows(n = Infinity) {
const rows = []
const layer = this.open().layers.get(0)
let f
let i = 0
for (f = layer.features.first(); f && i < n; f = layer.features.next()) {
rows.push(f.fields.toObject())
i++
}
return rows
}
/**
* Sample field values from input.
*
* @param {object} [options]
* @param {number} [options.n=1000] - Maximum number of features to sample.
* @param {number} [options.max=100] - Maximum number of values to collect for each
* field.
* @param {boolean} [options.sort=true] - Whether to sort values.
* @param {boolean} [options.unique=true] - Whether to only save unique values.
* @return {object.<string, Array>} Object of field values with field names as keys.
*/
sample(options = {}) {
options = {
n: 1000,
max: 100,
sort: true,
unique: true,
...options
}
const types = this.getFields()
const values = {}
for (const key in types) {
values[key] = options.unique ? new Set() : []
}
const layer = this.open().layers.get(0)
let f
let i = 1
for (f = layer.features.first();
f && i <= options.n; f = layer.features.next()) {
for (let [key, value] of Object.entries(f.fields.toObject())) {
const formatter = helpers.gdalStringFormatters[types[key]]
value = formatter ? formatter(value) : value
if ((options.unique ? values[key].size : values[key].length) < options.max) {
if (options.unique) {
values[key].add(value)
} else {
values[key].push(value)
}
}
}
i++
}
// Convert sets to arrays
for (const key in values) {
if (options.unique) {
values[key] = [...values[key]]
}
if (options.sort) {
values[key].sort()
}
}
return values
}
/**
* Print table of input field names, types, and unique values.
*
* @param {object} [options] - Options to pass to {@link Source#sample}, plus:
* @param {object.<string, Array>} [options.sample] - Result of
* {@link Source#sample}.
* @param {number} [options.truncate=1280] - Maximum number of characters to
* print per field.
* @param {number[]} [options.widths=[20, 10, 130]] - Column widths for field
* names, types, and unique values, respectively.
* @param {string} [options.sep= · ] - Separator between unique values.
*/
glimpse(options = {}) {
options = {
truncate: 1280,
widths: [20, 10, 130],
sep: ' · ',
...options
}
if (!options.sample) {
options.sample = this.sample(options)
}
const tableOptions = {
columnDefault: {
wrapWord: true,
truncate: options.truncate
},
columns: {
0: { width: options.widths[0] },
1: { width: options.widths[1] },
2: { width: options.widths[2] }
}
}
const types = this.getFields()
// Print
const data = [
['name'.bold, 'type'.bold, 'values'.bold],
...Object.keys(options.sample).map(key =>
[key, types[key], options.sample[key].join(options.sep)])
]
console.log(table(data, tableOptions))
}
/**
* Empty and remove the source directory.
*/
empty() {
fs.rmdirSync(this.dir, { recursive: true })
}
/**
* Check whether the source directory is missing or empty of files.
*
* Checks any child directories recursively and ignores dotfiles (.*).
*
* @return {boolean} Whether source directory is empty.
*/
isEmpty() {
const files = glob.sync('**/*',
{ nocase: true, nodir: true, dot: false, cwd: this.dir })
return files.length == 0
}
/**
* Download a remote file to the source directory.
*
* @param {string} url - Path to the remote file.
* @return {Promise<string>} Resolves to the path of the downloaded file.
* @private
*/
async downloadFile(url) {
const options = { override: true, retry: { maxRetries: 1, delay: 3000 } }
const listeners = {
download: info => this.log(`Downloading ${info.fileName}`),
end: info => this.success(
`Downloaded ${info.fileName} (${(info.downloadedSize / 1e6).toFixed()} MB)`
),
error: error => this.error(`Download failed for ${url}: ${error.message}`)
}
try {
return await helpers.downloadFile(url, this.dir, options, listeners)
} catch (error) {
throw error
}
}
/**
* Unpack a compressed or archive local file to the source directory.
*
* Currently supports zip, tar, tar.bz2, and tar.gz via
* [decompress](https://www.npmjs.com/package/decompress). Support can be
* added for bz2 and gz by adding the corresponding
* [plugins](https://www.npmjs.com/search?q=keywords:decompressplugin) to the
* dependencies.
*
* @param {string} file - Path to the local file.
* @param {boolean} [rm=true] - Whether to remove the original file if
* unpacked successfully.
* @return {Promise<string[]>} Resolves to the paths of the unpacked files (if
* any) or the path of the original file.
* @private
*/
async unpackFile(file, rm = true) {
const filename = path.relative(this.dir, file)
try {
const files = await decompress(file, this.dir)
if (files.length) {
this.success(`Unpacked ${filename}:`, files.map(file => file.path))
if (rm) fs.unlinkSync(file)
return files.map(file => path.join(this.dir, file.path))
} else {
return [file]
}
} catch (error) {
this.error(`Unpack failed for ${filename}:`, error.message)
}
}
/**
* Download and unpack a remote file to the source directory.
*
* @param {string} url - Path to the remote file.
* @return {Promise<sring[]>} Resolves to the paths of the unpacked files (if
* any) or the local path of the downloaded file.
* @private
*/
async getFile(url) {
try {
const file = await this.downloadFile(url)
return await this.unpackFile(file)
} catch (error) {
throw error
}
}
/**
* Download and unpack remote files to the source directory.
*
* Downloads all file paths in `this.props.download` and unpacks any
* compressed or archive files.
*
* @return {Promise<string[]>} Resolves to the paths of the downloaded and
* unpacked local files.
* @private
*/
async getFiles() {
let urls = this.props.download
if (typeof urls === 'string') {
urls = [urls]
}
try {
const paths = await Promise.all(urls.map(url => this.getFile(url)))
return paths.flat()
} catch (error) {
throw error
}
}
/**
* Execute shell commands from the source directory.
*
* Executes all shell commands in `this.props.execute` from the source
* directory (`this.dir`).
*
* @return {Promise}
* @private
*/
async execute() {
if (this.props.execute) {
const cmd = typeof this.props.execute === 'string' ?
this.props.execute : this.props.execute.join(' && ')
this.log('Executing:', this.props.execute)
try {
exec(`cd '${this.dir}' && ${cmd}`)
} catch (error) {
this.error('Execution failed:', error.message)
}
}
}
/**
* Find path to input file.
*
* Searches for all non-dotfiles in the source directory recursively and
* attempts to guess which file to pass to GDAL based on file extensions.
* Throws an error if no file is found or if multiple candidate files are
* found.
*
* @return {string} File path.
*/
find() {
let paths
if (this.props.filename) {
paths = glob.sync(this.props.filename, { nodir: true, cwd: this.dir })
} else {
paths = glob.sync(`**/*`,
{ nocase: true, nodir: true, dot: false, cwd: this.dir })
}
if (!this.props.filename) {
if (paths.length) {
const primaries = paths.filter(s =>
s.match(helpers.gdalFilePatterns.primary))
const secondaries = paths.filter(s =>
s.match(helpers.gdalFilePatterns.secondary))
if (primaries.length) {
paths = primaries
} else if (secondaries.length) {
paths = secondaries
} else {
this.warn('Found files with exotic or missing extensions:', paths)
}
}
}
if (paths.length) {
if (paths.length == 1) {
return path.join(this.dir, paths[0])
} else {
this.error(`Found ${paths.length} possible inputs:`, paths)
}
} else {
this.error('No inputs found')
}
}
/**
* Open input file with GDAL.
*
* @return {gdal.Dataset} See the documentation for
* [node-gdal-next](https://contra.io/node-gdal-next/classes/gdal.Dataset.html).
* Result is cached until closed with {@link Source#close}.
*/
open() {
// Clear if already destroyed
try {
// Choice of property is arbitrary
this.__dataset.description
} catch {
this.__dataset = null
}
if (!this.__dataset) {
this.__dataset = gdal.open(this.find())
}
return this.__dataset
}
/**
* Close input file if open with GDAL.
*/
close() {
try {
this.__dataset.close()
} catch {
} finally {
this.__dataset = null
}
}
/**
* Open input file with GDAL via a VRT file.
*
* Opens the input file via a virtual format (VRT) file written to the dotfile
* `.vrt`. The contents of the file is built by {@link Source#getVrt}.
*
* @param {boolean} [keepGeometryFields=false] - Whether the VRT file should
* return geometry fields as regular feature fields.
* @return {gdal.Dataset} See the documentation for
* [node-gdal-next](https://contra.io/node-gdal-next/classes/gdal.Dataset.html).
* The result is cached until closed with {@link Source#closeVrt}.
*/
openVrt(keepGeometryFields = false) {
// Clear if already destroyed
try {
// Choice of property is arbitrary
this.__vrt.description
} catch {
this.__vrt = null
}
if (!this.__vrt) {
// HACK: Writes to local dotfile to hide from find()
const vrtPath = path.join(this.dir, '.vrt')
fs.writeFileSync(vrtPath, this.getVrt(keepGeometryFields))
this.__vrt = gdal.open(vrtPath)
}
return this.__vrt
}
/**
* Close input file if open with GDAL via a VRT file.
*/
closeVrt() {
try {
this.__vrt.close()
} catch {
} finally {
this.__vrt = null
}
}
/**
* Get spatial reference system (SRS) of input as a string.
*
* @param {gdal.Layer} [layer] - Feature layer from which to read SRS. If not
* provided, defaults to the first layer of the input file (see @link
* Source#open).
* @return {string} Either the provided SRS (`this.props.srs`), the SRS read
* from the input file (as well-known-text), or the default SRS
* (`this.options.srs`).
*/
getSrsString(layer) {
let srs = this.props.srs
if (!srs) {
if (!layer) {
layer = this.open().layers.get(0)
}
if (layer.srs) {
srs = layer.srs.toWKT()
}
}
if (!srs) {
srs = this.options.srs
this.warn('Assuming default SRS:', srs)
}
return srs
}
/**
* Get spatial reference system (SRS) of input.
*
* @param {gdal.Layer} [layer] - Feature layer from which to read SRS. If not
* provided, defaults to the first layer of the input file (see @link
* Source#open).
* @return {gdal.SpatialReference} SRS object initialized by
* `gdal.SpatialReference.fromUserInput()` from the result of
* {@link Source#getSrsString}. See the documentation for
* [node-gdal-next](https://contra.io/node-gdal-next/classes/gdal.SpatialReference.html#method-fromUserInput).
*/
getSrs(layer) {
const srs = this.getSrsString(layer)
return gdal.SpatialReference.fromUserInput(srs)
}
/**
* Get geometry field name(s) of input.
*
* @return {{?wkt: string, ?x: string, ?y: string}|undefined} Names of
* geometry fields either provided (`this.props.srs`) or guessed from field
* names, or `undefined` if the input already has explicit geometries.
*/
getGeometry() {
let geometry = this.props.geometry
if (!geometry) {
geometry = {}
const layer = this.open().layers.get(0)
if (layer.geomType != gdal.wkbNone) {