Skip to content

Commit

Permalink
feat: ascii file read updates to address #112 (#113)
Browse files Browse the repository at this point in the history
feat: added delimiter option and datetime parsing in spatial class
  • Loading branch information
tsutterley authored Oct 3, 2022
1 parent b7d813b commit d0a18f3
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 95 deletions.
1 change: 1 addition & 0 deletions doc/source/api_reference/compute_LPET_elevations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Calling Sequence
* ``'GPS'``: GPS Time
* ``'LORAN'``: Long Range Navigator Time
* ``'TAI'``: International Atomic Time
* ``'datetime'``: formatted datetime string in UTC

--projection : @after
* ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
1 change: 1 addition & 0 deletions doc/source/api_reference/compute_LPT_displacements.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ compute_LPT_displacements.py
* ``'GPS'``: GPS Time
* ``'LORAN'``: Long Range Navigator Time
* ``'TAI'``: International Atomic Time
* ``'datetime'``: formatted datetime string in UTC

--projection : @after
* ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
Expand Down
1 change: 1 addition & 0 deletions doc/source/api_reference/compute_OPT_displacements.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ compute_OPT_displacements.py
* ``'GPS'``: GPS Time
* ``'LORAN'``: Long Range Navigator Time
* ``'TAI'``: International Atomic Time
* ``'datetime'``: formatted datetime string in UTC

--projection : @after
* ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
1 change: 1 addition & 0 deletions doc/source/api_reference/compute_tidal_currents.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ compute_tidal_currents.py
* ``'GPS'``: GPS Time
* ``'LORAN'``: Long Range Navigator Time
* ``'TAI'``: International Atomic Time
* ``'datetime'``: formatted datetime string in UTC

--projection : @after
* ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
Expand Down
1 change: 1 addition & 0 deletions doc/source/api_reference/compute_tidal_elevations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ compute_tidal_elevations.py
* ``'GPS'``: GPS Time
* ``'LORAN'``: Long Range Navigator Time
* ``'TAI'``: International Atomic Time
* ``'datetime'``: formatted datetime string in UTC

--projection : @after
* ``4326``: latitude and longitude coordinates on WGS84 reference ellipsoid
Expand Down
47 changes: 34 additions & 13 deletions pyTMD/spatial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
spatial.py
Written by Tyler Sutterley (06/2022)
Written by Tyler Sutterley (10/2022)
Utilities for reading, writing and operating on spatial data
Expand All @@ -19,6 +19,7 @@
https://github.com/yaml/pyyaml
UPDATE HISTORY:
Updated 10/2022: added datetime parser for ascii time columns
Updated 06/2022: added field_mapping options to netCDF4 and HDF5 reads
added from_file wrapper function to read from particular formats
Updated 04/2022: add option to reduce input GDAL raster datasets
Expand Down Expand Up @@ -56,6 +57,7 @@
import datetime
import warnings
import numpy as np
import dateutil.parser
# attempt imports
try:
import osgeo.gdal, osgeo.osr, osgeo.gdalconst
Expand Down Expand Up @@ -165,13 +167,19 @@ def from_ascii(filename, **kwargs):
file compression type
columns: list, default ['time','y','x','data']
column names of ascii file
delimiter: str,
Delimiter for csv or ascii files
header: int, default 0
header lines to skip from start of file
parse_dates: bool, default False
Try parsing the time column
"""
# set default keyword arguments
kwargs.setdefault('compression',None)
kwargs.setdefault('columns',['time','y','x','data'])
kwargs.setdefault('delimiter',',')
kwargs.setdefault('header',0)
kwargs.setdefault('parse_dates',False)
# print filename
logging.info(filename)
# get column names
Expand Down Expand Up @@ -218,30 +226,43 @@ def from_ascii(filename, **kwargs):
dinput['attributes'] = YAML_HEADER['header']['global_attributes']
# allocate for each variable and copy variable attributes
for c in columns:
dinput[c] = np.zeros((file_lines-count))
if (c == 'time') and kwargs['parse_dates']:
dinput[c] = np.zeros((file_lines-count),dtype='datetime64[ms]')
else:
dinput[c] = np.zeros((file_lines-count))
dinput['attributes'][c] = YAML_HEADER['header']['variables'][c]
# update number of file lines to skip for reading data
header = int(count)
else:
# output spatial data and attributes
dinput = {c:np.zeros((file_lines-kwargs['header'])) for c in columns}
dinput['attributes'] = {c:dict() for c in columns}
# allocate for each variable and variable attributes
dinput = {}
header = int(kwargs['header'])
for c in columns:
if (c == 'time') and kwargs['parse_dates']:
dinput[c] = np.zeros((file_lines-header),dtype='datetime64[ms]')
else:
dinput[c] = np.zeros((file_lines-header))
dinput['attributes'] = {c:dict() for c in columns}
# extract spatial data array
# for each line in the file
for i,line in enumerate(file_contents[header:]):
# extract columns of interest and assign to dict
# convert fortran exponentials if applicable
column = {c:r.replace('D','E') for c,r in zip(columns,rx.findall(line))}
if kwargs['delimiter']:
column = {c:l.replace('D','E') for c,l in zip(columns,line.split(kwargs['delimiter']))}
else:
column = {c:r.replace('D','E') for c,r in zip(columns,rx.findall(line))}
# copy variables from column dict to output dictionary
for c in columns:
dinput[c][i] = np.float64(column[c])
if (c == 'time') and kwargs['parse_dates']:
dinput[c][i] = dateutil.parser.parse(column[c])
else:
dinput[c][i] = np.float64(column[c])
# convert to masked array if fill values
dinput['data'] = np.ma.asarray(dinput['data'])
dinput['data'].mask = np.zeros_like(dinput['data'],dtype=bool)
if '_FillValue' in dinput['attributes']['data'].keys():
if 'data' in dinput.keys() and '_FillValue' in dinput['attributes']['data'].keys():
dinput['data'] = np.ma.asarray(dinput['data'])
dinput['data'].fill_value = dinput['attributes']['data']['_FillValue']
dinput['data'].mask[:] = (dinput['data'].data == dinput['data'].fill_value)
dinput['data'].mask = (dinput['data'].data == dinput['data'].fill_value)
# return the spatial variables
return dinput

Expand Down Expand Up @@ -340,7 +361,7 @@ def from_netCDF4(filename, **kwargs):
srs.ImportFromWkt(dinput['attributes']['crs']['crs_wkt'])
dinput['attributes']['projection'] = srs.ExportToProj4()
# convert to masked array if fill values
if '_FillValue' in dinput['attributes']['data'].keys():
if 'data' in dinput.keys() and '_FillValue' in dinput['attributes']['data'].keys():
dinput['data'] = np.ma.asarray(dinput['data'])
dinput['data'].fill_value = dinput['attributes']['data']['_FillValue']
dinput['data'].mask = (dinput['data'].data == dinput['data'].fill_value)
Expand Down Expand Up @@ -445,7 +466,7 @@ def from_HDF5(filename, **kwargs):
srs.ImportFromWkt(dinput['attributes']['crs']['crs_wkt'])
dinput['attributes']['projection'] = srs.ExportToProj4()
# convert to masked array if fill values
if '_FillValue' in dinput['attributes']['data'].keys():
if 'data' in dinput.keys() and '_FillValue' in dinput['attributes']['data'].keys():
dinput['data'] = np.ma.asarray(dinput['data'])
dinput['data'].fill_value = dinput['attributes']['data']['_FillValue']
dinput['data'].mask = (dinput['data'].data == dinput['data'].fill_value)
Expand Down
62 changes: 46 additions & 16 deletions scripts/compute_LPET_elevations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
compute_LPET_elevations.py
Written by Tyler Sutterley (04/2022)
Written by Tyler Sutterley (10/2022)
Calculates long-period equilibrium tidal elevations for an input file
INPUTS:
Expand All @@ -20,18 +20,20 @@
for csv files: the order of the columns within the file
for HDF5 and netCDF4 files: time, y, x and data variable names
-H X, --header X: number of header lines for csv files
--delimiter X: Delimiter for csv or ascii files
-t X, --type X: input data type
drift: drift buoys or satellite/airborne altimetry (time per data point)
grid: spatial grids or images (single time for all data points)
-e X, --epoch X: Reference epoch of input time (default Modified Julian Day)
days since 1858-11-17T00:00:00
-d X, --deltatime X: Input delta time for files without date information
can be set to 0 to use exact calendar date from epoch
-s X, --standard X: Input time standard for delta times
-s X, --standard X: Input time standard for delta times or input time type
UTC: Coordinate Universal Time
GPS: GPS Time
LORAN: Long Range Navigator Time
TAI: International Atomic Time
datetime: formatted datetime string in UTC
-P X, --projection X: spatial projection as EPSG code or PROJ4 string
4326: latitude and longitude coordinates on WGS84 reference ellipsoid
-V, --verbose: Verbose output of processing run
Expand Down Expand Up @@ -62,6 +64,7 @@
compute_equilibrium_tide.py: calculates long-period equilibrium ocean tides
UPDATE HISTORY:
Updated 10/2022: added delimiter option and datetime parsing for ascii files
Updated 04/2022: use argparse descriptions within documentation
Updated 01/2022: added option for changing the time standard
Updated 11/2021: add function for attempting to extract projection
Expand Down Expand Up @@ -114,9 +117,17 @@ def get_projection(attributes, PROJECTION):
# PURPOSE: read csv, netCDF or HDF5 data
# compute long-period equilibrium tides at points and times
def compute_LPET_elevations(input_file, output_file,
FORMAT='csv', VARIABLES=['time','lat','lon','data'], HEADER=0, TYPE='drift',
TIME_UNITS='days since 1858-11-17T00:00:00', TIME_STANDARD='UTC',
TIME=None, PROJECTION='4326', VERBOSE=False, MODE=0o775):
FORMAT='csv',
VARIABLES=['time','lat','lon','data'],
HEADER=0,
DELIMITER=',',
TYPE='drift',
TIME_UNITS='days since 1858-11-17T00:00:00',
TIME_STANDARD='UTC',
TIME=None,
PROJECTION='4326',
VERBOSE=False,
MODE=0o775):

# create logger for verbosity level
loglevel = logging.INFO if VERBOSE else logging.CRITICAL
Expand Down Expand Up @@ -149,8 +160,9 @@ def compute_LPET_elevations(input_file, output_file,

# read input file to extract time, spatial coordinates and data
if (FORMAT == 'csv'):
parse_dates = (TIME_STANDARD.lower() == 'datetime')
dinput = pyTMD.spatial.from_ascii(input_file, columns=VARIABLES,
header=HEADER)
delimiter=DELIMITER, header=HEADER, parse_dates=parse_dates)
elif (FORMAT == 'netCDF4'):
dinput = pyTMD.spatial.from_netCDF4(input_file, timename=VARIABLES[0],
xname=VARIABLES[2], yname=VARIABLES[1], varname=VARIABLES[3])
Expand Down Expand Up @@ -217,9 +229,15 @@ def compute_LPET_elevations(input_file, output_file,
else:
leap_seconds = 0.0

# convert time from units to days since 1992-01-01T00:00:00 (UTC)
tide_time = pyTMD.time.convert_delta_time(delta_time-leap_seconds,
epoch1=epoch1, epoch2=(1992,1,1,0,0,0), scale=1.0/86400.0)
if (TIME_STANDARD.lower() == 'datetime'):
# convert delta time array from datetime object
# to days relative to 1992-01-01T00:00:00
tide_time = pyTMD.time.convert_datetime(delta_time,
epoch=(1992,1,1,0,0,0))/86400.0
else:
# convert time from units to days since 1992-01-01T00:00:00 (UTC)
tide_time = pyTMD.time.convert_delta_time(delta_time-leap_seconds,
epoch1=epoch1, epoch2=(1992,1,1,0,0,0), scale=1.0/86400.0)
# interpolate delta times from calendar dates to tide time
delta_file = pyTMD.utilities.get_data_path(['data','merged_deltat.data'])
deltat = calc_delta_time(delta_file, tide_time)
Expand All @@ -238,7 +256,8 @@ def compute_LPET_elevations(input_file, output_file,
# output to file
output = dict(time=tide_time,lon=lon,lat=lat,tide_lpe=tide_lpe)
if (FORMAT == 'csv'):
pyTMD.spatial.to_ascii(output, attrib, output_file, delimiter=',',
pyTMD.spatial.to_ascii(output, attrib, output_file,
delimiter=DELIMITER, header=False,
columns=['time','lat','lon','tide_lpe'])
elif (FORMAT == 'netCDF4'):
pyTMD.spatial.to_netCDF4(output, attrib, output_file)
Expand Down Expand Up @@ -279,6 +298,10 @@ def arguments():
parser.add_argument('--header','-H',
type=int, default=0,
help='Number of header lines for csv files')
# delimiter for csv or ascii files
parser.add_argument('--delimiter',
type=str, default=',',
help='Delimiter for csv or ascii files')
# input data type
# drift: drift buoys or satellite/airborne altimetry (time per data point)
# grid: spatial grids or images (single time for all data points)
Expand All @@ -297,7 +320,7 @@ def arguments():
help='Input delta time for files without date variables')
# input time standard definition
parser.add_argument('--standard','-s',
type=str, choices=('UTC','GPS','TAI','LORAN'), default='UTC',
type=str, choices=('UTC','GPS','TAI','LORAN','datetime'), default='UTC',
help='Input time standard for delta times')
# spatial projection (EPSG code or PROJ4 string)
parser.add_argument('--projection','-P',
Expand Down Expand Up @@ -328,11 +351,18 @@ def main():
args.outfile = '{0}_{1}{2}'.format(*vars)

# run long period equilibrium tide program for input file
compute_LPET_elevations(args.infile, args.outfile, FORMAT=args.format,
VARIABLES=args.variables, HEADER=args.header, TYPE=args.type,
TIME_UNITS=args.epoch, TIME=args.deltatime,
TIME_STANDARD=args.standard, PROJECTION=args.projection,
VERBOSE=args.verbose, MODE=args.mode)
compute_LPET_elevations(args.infile, args.outfile,
FORMAT=args.format,
VARIABLES=args.variables,
HEADER=args.header,
DELIMITER=args.delimiter,
TYPE=args.type,
TIME_UNITS=args.epoch,
TIME=args.deltatime,
TIME_STANDARD=args.standard,
PROJECTION=args.projection,
VERBOSE=args.verbose,
MODE=args.mode)

# run main program
if __name__ == '__main__':
Expand Down
Loading

0 comments on commit d0a18f3

Please sign in to comment.