feat: ascii file read updates to address #112 (#113)

feat: added delimiter option and datetime parsing in spatial class
tsutterley · Oct 3, 2022 · d0a18f3 · d0a18f3
1 parent b7d813b
commit d0a18f3
Show file tree

Hide file tree

Showing 12 changed files with 269 additions and 95 deletions.
diff --git a/doc/source/api_reference/compute_LPET_elevations.rst b/doc/source/api_reference/compute_LPET_elevations.rst
@@ -39,6 +39,7 @@ Calling Sequence
         * ``'GPS'``: GPS Time
         * ``'LORAN'``: Long Range Navigator Time
         * ``'TAI'``: International Atomic Time
+        * ``'datetime'``: formatted datetime string in UTC
 
     --projection : @after
         * ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
diff --git a/doc/source/api_reference/compute_LPT_displacements.rst b/doc/source/api_reference/compute_LPT_displacements.rst
@@ -37,6 +37,7 @@ compute_LPT_displacements.py
         * ``'GPS'``: GPS Time
         * ``'LORAN'``: Long Range Navigator Time
         * ``'TAI'``: International Atomic Time
+        * ``'datetime'``: formatted datetime string in UTC
 
     --projection : @after
         * ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid

diff --git a/doc/source/api_reference/compute_OPT_displacements.rst b/doc/source/api_reference/compute_OPT_displacements.rst
@@ -38,6 +38,7 @@ compute_OPT_displacements.py
         * ``'GPS'``: GPS Time
         * ``'LORAN'``: Long Range Navigator Time
         * ``'TAI'``: International Atomic Time
+        * ``'datetime'``: formatted datetime string in UTC
 
     --projection : @after
         * ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
diff --git a/doc/source/api_reference/compute_tidal_currents.rst b/doc/source/api_reference/compute_tidal_currents.rst
@@ -37,6 +37,7 @@ compute_tidal_currents.py
         * ``'GPS'``: GPS Time
         * ``'LORAN'``: Long Range Navigator Time
         * ``'TAI'``: International Atomic Time
+        * ``'datetime'``: formatted datetime string in UTC
 
     --projection : @after
         * ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid

diff --git a/doc/source/api_reference/compute_tidal_elevations.rst b/doc/source/api_reference/compute_tidal_elevations.rst
@@ -38,6 +38,7 @@ compute_tidal_elevations.py
         * ``'GPS'``: GPS Time
         * ``'LORAN'``: Long Range Navigator Time
         * ``'TAI'``: International Atomic Time
+        * ``'datetime'``: formatted datetime string in UTC
 
     --projection : @after
         * ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid

diff --git a/pyTMD/spatial.py b/pyTMD/spatial.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 u"""
 spatial.py
-Written by Tyler Sutterley (06/2022)
+Written by Tyler Sutterley (10/2022)
 
 Utilities for reading, writing and operating on spatial data
 
@@ -19,6 +19,7 @@
         https://github.com/yaml/pyyaml
 
 UPDATE HISTORY:
+    Updated 10/2022: added datetime parser for ascii time columns
     Updated 06/2022: added field_mapping options to netCDF4 and HDF5 reads
         added from_file wrapper function to read from particular formats
     Updated 04/2022: add option to reduce input GDAL raster datasets
@@ -56,6 +57,7 @@
 import datetime
 import warnings
 import numpy as np
+import dateutil.parser
 # attempt imports
 try:
     import osgeo.gdal, osgeo.osr, osgeo.gdalconst
@@ -165,13 +167,19 @@ def from_ascii(filename, **kwargs):
         file compression type
     columns: list, default ['time','y','x','data']
         column names of ascii file
+    delimiter: str,
+        Delimiter for csv or ascii files
     header: int, default 0
         header lines to skip from start of file
+    parse_dates: bool, default False
+        Try parsing the time column
     """
     # set default keyword arguments
     kwargs.setdefault('compression',None)
     kwargs.setdefault('columns',['time','y','x','data'])
+    kwargs.setdefault('delimiter',',')
     kwargs.setdefault('header',0)
+    kwargs.setdefault('parse_dates',False)
     # print filename
     logging.info(filename)
     # get column names
@@ -218,30 +226,43 @@ def from_ascii(filename, **kwargs):
         dinput['attributes'] = YAML_HEADER['header']['global_attributes']
         # allocate for each variable and copy variable attributes
         for c in columns:
-            dinput[c] = np.zeros((file_lines-count))
+            if (c == 'time') and kwargs['parse_dates']:
+                dinput[c] = np.zeros((file_lines-count),dtype='datetime64[ms]')
+            else:
+                dinput[c] = np.zeros((file_lines-count))
             dinput['attributes'][c] = YAML_HEADER['header']['variables'][c]
         # update number of file lines to skip for reading data
         header = int(count)
     else:
-        # output spatial data and attributes
-        dinput = {c:np.zeros((file_lines-kwargs['header'])) for c in columns}
-        dinput['attributes'] = {c:dict() for c in columns}
+        # allocate for each variable and variable attributes
+        dinput = {}
         header = int(kwargs['header'])
+        for c in columns:
+            if (c == 'time') and kwargs['parse_dates']:
+                dinput[c] = np.zeros((file_lines-header),dtype='datetime64[ms]')
+            else:
+                dinput[c] = np.zeros((file_lines-header))
+        dinput['attributes'] = {c:dict() for c in columns}
     # extract spatial data array
     # for each line in the file
     for i,line in enumerate(file_contents[header:]):
         # extract columns of interest and assign to dict
         # convert fortran exponentials if applicable
-        column = {c:r.replace('D','E') for c,r in zip(columns,rx.findall(line))}
+        if kwargs['delimiter']:
+            column = {c:l.replace('D','E') for c,l in zip(columns,line.split(kwargs['delimiter']))}
+        else:
+            column = {c:r.replace('D','E') for c,r in zip(columns,rx.findall(line))}
         # copy variables from column dict to output dictionary
         for c in columns:
-            dinput[c][i] = np.float64(column[c])
+            if (c == 'time') and kwargs['parse_dates']:
+                dinput[c][i] = dateutil.parser.parse(column[c])
+            else:
+                dinput[c][i] = np.float64(column[c])
     # convert to masked array if fill values
-    dinput['data'] = np.ma.asarray(dinput['data'])
-    dinput['data'].mask = np.zeros_like(dinput['data'],dtype=bool)
-    if '_FillValue' in dinput['attributes']['data'].keys():
+    if 'data' in dinput.keys() and '_FillValue' in dinput['attributes']['data'].keys():
+        dinput['data'] = np.ma.asarray(dinput['data'])
         dinput['data'].fill_value = dinput['attributes']['data']['_FillValue']
-        dinput['data'].mask[:] = (dinput['data'].data == dinput['data'].fill_value)
+        dinput['data'].mask = (dinput['data'].data == dinput['data'].fill_value)
     # return the spatial variables
     return dinput
 
@@ -340,7 +361,7 @@ def from_netCDF4(filename, **kwargs):
         srs.ImportFromWkt(dinput['attributes']['crs']['crs_wkt'])
         dinput['attributes']['projection'] = srs.ExportToProj4()
     # convert to masked array if fill values
-    if '_FillValue' in dinput['attributes']['data'].keys():
+    if 'data' in dinput.keys() and '_FillValue' in dinput['attributes']['data'].keys():
         dinput['data'] = np.ma.asarray(dinput['data'])
         dinput['data'].fill_value = dinput['attributes']['data']['_FillValue']
         dinput['data'].mask = (dinput['data'].data == dinput['data'].fill_value)
@@ -445,7 +466,7 @@ def from_HDF5(filename, **kwargs):
         srs.ImportFromWkt(dinput['attributes']['crs']['crs_wkt'])
         dinput['attributes']['projection'] = srs.ExportToProj4()
     # convert to masked array if fill values
-    if '_FillValue' in dinput['attributes']['data'].keys():
+    if 'data' in dinput.keys() and '_FillValue' in dinput['attributes']['data'].keys():
         dinput['data'] = np.ma.asarray(dinput['data'])
         dinput['data'].fill_value = dinput['attributes']['data']['_FillValue']
         dinput['data'].mask = (dinput['data'].data == dinput['data'].fill_value)

diff --git a/scripts/compute_LPET_elevations.py b/scripts/compute_LPET_elevations.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 u"""
 compute_LPET_elevations.py
-Written by Tyler Sutterley (04/2022)
+Written by Tyler Sutterley (10/2022)
 Calculates long-period equilibrium tidal elevations for an input file
 
 INPUTS:
@@ -20,18 +20,20 @@
         for csv files: the order of the columns within the file
         for HDF5 and netCDF4 files: time, y, x and data variable names
     -H X, --header X: number of header lines for csv files
+    --delimiter X: Delimiter for csv or ascii files
     -t X, --type X: input data type
         drift: drift buoys or satellite/airborne altimetry (time per data point)
         grid: spatial grids or images (single time for all data points)
     -e X, --epoch X: Reference epoch of input time (default Modified Julian Day)
         days since 1858-11-17T00:00:00
     -d X, --deltatime X: Input delta time for files without date information
         can be set to 0 to use exact calendar date from epoch
-    -s X, --standard X: Input time standard for delta times
+    -s X, --standard X: Input time standard for delta times or input time type
         UTC: Coordinate Universal Time
         GPS: GPS Time
         LORAN: Long Range Navigator Time
         TAI: International Atomic Time
+        datetime: formatted datetime string in UTC
     -P X, --projection X: spatial projection as EPSG code or PROJ4 string
         4326: latitude and longitude coordinates on WGS84 reference ellipsoid
     -V, --verbose: Verbose output of processing run
@@ -62,6 +64,7 @@
     compute_equilibrium_tide.py: calculates long-period equilibrium ocean tides
 
 UPDATE HISTORY:
+    Updated 10/2022: added delimiter option and datetime parsing for ascii files
     Updated 04/2022: use argparse descriptions within documentation
     Updated 01/2022: added option for changing the time standard
     Updated 11/2021: add function for attempting to extract projection
@@ -114,9 +117,17 @@ def get_projection(attributes, PROJECTION):
 # PURPOSE: read csv, netCDF or HDF5 data
 # compute long-period equilibrium tides at points and times
 def compute_LPET_elevations(input_file, output_file,
-    FORMAT='csv', VARIABLES=['time','lat','lon','data'], HEADER=0, TYPE='drift',
-    TIME_UNITS='days since 1858-11-17T00:00:00', TIME_STANDARD='UTC',
-    TIME=None, PROJECTION='4326', VERBOSE=False, MODE=0o775):
+    FORMAT='csv',
+    VARIABLES=['time','lat','lon','data'],
+    HEADER=0,
+    DELIMITER=',',
+    TYPE='drift',
+    TIME_UNITS='days since 1858-11-17T00:00:00',
+    TIME_STANDARD='UTC',
+    TIME=None,
+    PROJECTION='4326',
+    VERBOSE=False,
+    MODE=0o775):
 
     # create logger for verbosity level
     loglevel = logging.INFO if VERBOSE else logging.CRITICAL
@@ -149,8 +160,9 @@ def compute_LPET_elevations(input_file, output_file,
 
     # read input file to extract time, spatial coordinates and data
     if (FORMAT == 'csv'):
+        parse_dates = (TIME_STANDARD.lower() == 'datetime')
         dinput = pyTMD.spatial.from_ascii(input_file, columns=VARIABLES,
-            header=HEADER)
+            delimiter=DELIMITER, header=HEADER, parse_dates=parse_dates)
     elif (FORMAT == 'netCDF4'):
         dinput = pyTMD.spatial.from_netCDF4(input_file, timename=VARIABLES[0],
             xname=VARIABLES[2], yname=VARIABLES[1], varname=VARIABLES[3])
@@ -217,9 +229,15 @@ def compute_LPET_elevations(input_file, output_file,
     else:
         leap_seconds = 0.0
 
-    # convert time from units to days since 1992-01-01T00:00:00 (UTC)
-    tide_time = pyTMD.time.convert_delta_time(delta_time-leap_seconds,
-        epoch1=epoch1, epoch2=(1992,1,1,0,0,0), scale=1.0/86400.0)
+    if (TIME_STANDARD.lower() == 'datetime'):
+        # convert delta time array from datetime object
+        # to days relative to 1992-01-01T00:00:00
+        tide_time = pyTMD.time.convert_datetime(delta_time,
+            epoch=(1992,1,1,0,0,0))/86400.0
+    else:
+        # convert time from units to days since 1992-01-01T00:00:00 (UTC)
+        tide_time = pyTMD.time.convert_delta_time(delta_time-leap_seconds,
+            epoch1=epoch1, epoch2=(1992,1,1,0,0,0), scale=1.0/86400.0)
     # interpolate delta times from calendar dates to tide time
     delta_file = pyTMD.utilities.get_data_path(['data','merged_deltat.data'])
     deltat = calc_delta_time(delta_file, tide_time)
@@ -238,7 +256,8 @@ def compute_LPET_elevations(input_file, output_file,
     # output to file
     output = dict(time=tide_time,lon=lon,lat=lat,tide_lpe=tide_lpe)
     if (FORMAT == 'csv'):
-        pyTMD.spatial.to_ascii(output, attrib, output_file, delimiter=',',
+        pyTMD.spatial.to_ascii(output, attrib, output_file,
+            delimiter=DELIMITER, header=False,
             columns=['time','lat','lon','tide_lpe'])
     elif (FORMAT == 'netCDF4'):
         pyTMD.spatial.to_netCDF4(output, attrib, output_file)
@@ -279,6 +298,10 @@ def arguments():
     parser.add_argument('--header','-H',
         type=int, default=0,
         help='Number of header lines for csv files')
+    # delimiter for csv or ascii files
+    parser.add_argument('--delimiter',
+        type=str, default=',',
+        help='Delimiter for csv or ascii files')
     # input data type
     # drift: drift buoys or satellite/airborne altimetry (time per data point)
     # grid: spatial grids or images (single time for all data points)
@@ -297,7 +320,7 @@ def arguments():
         help='Input delta time for files without date variables')
     # input time standard definition
     parser.add_argument('--standard','-s',
-        type=str, choices=('UTC','GPS','TAI','LORAN'), default='UTC',
+        type=str, choices=('UTC','GPS','TAI','LORAN','datetime'), default='UTC',
         help='Input time standard for delta times')
     # spatial projection (EPSG code or PROJ4 string)
     parser.add_argument('--projection','-P',
@@ -328,11 +351,18 @@ def main():
         args.outfile = '{0}_{1}{2}'.format(*vars)
 
     # run long period equilibrium tide program for input file
-    compute_LPET_elevations(args.infile, args.outfile, FORMAT=args.format,
-        VARIABLES=args.variables, HEADER=args.header, TYPE=args.type,
-        TIME_UNITS=args.epoch, TIME=args.deltatime,
-        TIME_STANDARD=args.standard, PROJECTION=args.projection,
-        VERBOSE=args.verbose, MODE=args.mode)
+    compute_LPET_elevations(args.infile, args.outfile,
+        FORMAT=args.format,
+        VARIABLES=args.variables,
+        HEADER=args.header,
+        DELIMITER=args.delimiter,
+        TYPE=args.type,
+        TIME_UNITS=args.epoch,
+        TIME=args.deltatime,
+        TIME_STANDARD=args.standard,
+        PROJECTION=args.projection,
+        VERBOSE=args.verbose,
+        MODE=args.mode)
 
 # run main program
 if __name__ == '__main__':