Skip to content

Commit

Permalink
Add ability to process stacked data
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Pechnikov committed Jul 5, 2024
1 parent a02fecb commit d927200
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 68 deletions.
33 changes: 22 additions & 11 deletions pygmtsar/pygmtsar/IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,10 +347,6 @@ def open_cube(self, name):

# Workaround: open the dataset without chunking
data = xr.open_dataset(filename, engine=self.netcdf_engine)
# Determine the proper chunk sizes
chunks = {dim: 1 if dim in ['pair', 'date'] else self.chunksize for dim in data.dims}
# Re-chunk the dataset using the chunks dictionary
data = data.chunk(chunks)

if 'stack' in data.dims:
if 'y' in data.coords and 'x' in data.coords:
Expand All @@ -359,6 +355,13 @@ def open_cube(self, name):
multi_index_names = ['lat', 'lon']
multi_index = pd.MultiIndex.from_arrays([data.y.values, data.x.values], names=multi_index_names)
data = data.assign_coords(stack=multi_index)
chunksize = self.chunksize1d
else:
chunksize = self.chunksize

# set the proper chunk sizes
chunks = {dim: 1 if dim in ['pair', 'date'] else chunksize for dim in data.dims}
data = data.chunk(chunks)

# attributes are empty when dataarray is prezented as dataset
# revert dataarray converted to dataset
Expand Down Expand Up @@ -457,9 +460,12 @@ def save_cube(self, data, name=None, caption='Saving NetCDF 2D/3D Dataset'):
elif name is None:
raise ValueError('Specify name for the output NetCDF file')

chunksize = None
if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
# replace multiindex by sequential numbers 0,1,...
data = data.reset_index('stack')
# single-dimensional data compression required
chunksize = self.netcdf_chunksize1d

for dim in ['y', 'x', 'lat', 'lon']:
if dim in data.dims:
Expand All @@ -477,7 +483,8 @@ def save_cube(self, data, name=None, caption='Saving NetCDF 2D/3D Dataset'):
data = data.to_dataset().assign_attrs({'dataarray': data.name})

is_dask = isinstance(data[list(data.data_vars)[0]].data, dask.array.Array)
encoding = {varname: self._compression(data[varname].shape) for varname in data.data_vars}
encoding = {varname: self._compression(data[varname].shape, chunksize=chunksize) for varname in data.data_vars}
#print ('save_cube encoding', encoding)
#print ('is_dask', is_dask, 'encoding', encoding)

# save to NetCDF file
Expand Down Expand Up @@ -550,18 +557,22 @@ def open_stack(self, name, stack=None):
concat_dim='stackvar',
combine='nested'
)
# Determine the proper chunk sizes
chunks = {dim: 1 if dim in ['pair', 'date'] else self.chunksize for dim in data.dims}
data = data.chunk(chunks)


if 'stack' in data.dims:
if 'y' in data.coords and 'x' in data.coords:
multi_index_names = ['y', 'x']
elif 'lat' in data.coords and 'lon' in data.coords:
multi_index_names = ['lat', 'lon']
multi_index = pd.MultiIndex.from_arrays([data.y.values, data.x.values], names=multi_index_names)
data = data.assign_coords(stack=multi_index)

chunksize = self.chunksize1d
else:
chunksize = self.chunksize

# set the proper chunk sizes
chunks = {dim: 1 if dim in ['stackvar'] else chunksize for dim in data.dims}
data = data.chunk(chunks)

# revert dataarray converted to dataset
data_vars = list(data.data_vars)
if len(data_vars) == 1 and 'dataarray' in data.attrs:
Expand Down Expand Up @@ -734,7 +745,7 @@ def save_stack(self, data, name, caption='Saving 2D Stack', queue=None, timeout=
if isinstance(data, xr.DataArray):
data = data.to_dataset().assign_attrs({'dataarray': data.name})
encoding = {varname: self._compression(data[varname].shape[1:]) for varname in data.data_vars}
#print ('encoding', encoding)
#print ('save_stack encoding', encoding)

# Applying iterative processing to prevent Dask scheduler deadlocks.
counter = 0
Expand Down
41 changes: 36 additions & 5 deletions pygmtsar/pygmtsar/Stack_detrend.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,18 @@ def polyfit(self, data, weight=None, degree=0, days=None, count=None):
warnings.filterwarnings('ignore', module='dask')
warnings.filterwarnings('ignore', module='dask.core')

multi_index = None
if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
multi_index = data['stack']
data = data.reset_index('stack')
if weight is not None:
if not ('stack' in weight.dims and isinstance(weight.coords['stack'].to_index(), pd.MultiIndex)):
raise ValueError('ERROR: "weight", if provided, must be stacked consistently with "data".')
data = data.reset_index('stack')
else:
if 'stack' in weight.dims and isinstance(weight.coords['stack'].to_index(), pd.MultiIndex):
raise ValueError('ERROR: "weight", if provided, must be stacked consistently with "data".')

pairs, dates = self.get_pairs(data, dates=True)

models = []
Expand Down Expand Up @@ -362,8 +374,10 @@ def polyfit(self, data, weight=None, degree=0, days=None, count=None):

out = xr.concat([(model.sel(date=ref).drop('date') - model.sel(date=rep).drop('date'))\
.assign_coords(pair=str(ref.date()) + ' ' + str(rep.date()), ref=ref, rep=rep) \
for ref, rep in zip(pairs['ref'], pairs['rep'])], dim='pair')
return out.rename(data.name)
for ref, rep in zip(pairs['ref'], pairs['rep'])], dim='pair').rename(data.name)
if multi_index is not None:
return out.assign_coords(stack=multi_index)
return out

# def polyfit(self, data, weight=None, degree=0, variable=None, count=None):
# import xarray as xr
Expand Down Expand Up @@ -872,6 +886,8 @@ def turbulence(self, phase, weight=None):
import xarray as xr
import pandas as pd

print ('NOTE: this function is deprecated, use instead Stack.polyfit()')

pairs, dates = self.get_pairs(phase, dates=True)

turbos = []
Expand Down Expand Up @@ -904,14 +920,22 @@ def turbulence(self, phase, weight=None):
return phase_turbo.rename('turbulence')

def velocity(self, data):
import pandas as pd
import numpy as np
#years = ((data.date.max() - data.date.min()).dt.days/365.25).item()
#nanoseconds = (data.date.max().astype(int) - data.date.min().astype(int)).item()
#print ('years', np.round(years, 3), 'nanoseconds', nanoseconds)
multi_index = None
if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
multi_index = data.coords['stack']
# replace multiindex by sequential numbers 0,1,...
data = data.reset_index('stack')
#velocity = nanoseconds*data.polyfit('date', 1).polyfit_coefficients.sel(degree=1)/years
nanoseconds_per_year = 365.25*24*60*60*1e9
# calculate slope per year
velocity = nanoseconds_per_year*data.polyfit('date', 1).polyfit_coefficients.sel(degree=1).astype(np.float32).rename('trend')
if multi_index is not None:
return velocity.assign_coords(stack=multi_index)
return velocity

# def trend(self, data, deg=1):
Expand All @@ -927,7 +951,12 @@ def trend(self, data, dim='auto', deg=1):
import xarray as xr
import pandas as pd
import numpy as np


multi_index = None
if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
multi_index = data['stack']
data = data.reset_index('stack')

stackdim = [_dim for _dim in ['date', 'pair'] if _dim in data.dims]
if len(stackdim) != 1:
raise ValueError("The 'data' argument must include a 'date' or 'pair' dimension to detect trends.")
Expand All @@ -951,5 +980,7 @@ def trend(self, data, dim='auto', deg=1):
# fit existing coordinate values
fit_coeff = data.polyfit(dim, deg).polyfit_coefficients.astype(np.float32)
fit = xr.polyval(data[dim], fit_coeff).astype(np.float32).rename('trend')
return xr.merge([fit, fit_coeff]).rename(polyfit_coefficients='coefficients')

out = xr.merge([fit, fit_coeff]).rename(polyfit_coefficients='coefficients')
if multi_index is not None:
return out.assign_coords(stack=multi_index)
return out
30 changes: 29 additions & 1 deletion pygmtsar/pygmtsar/Stack_phasediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,8 +848,12 @@ def plot_phase(self, data, caption='Phase, [rad]',
quantile=None, vmin=None, vmax=None, symmetrical=False,
cmap='turbo', aspect=None, **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

if quantile is not None:
assert vmin is None and vmax is None, "ERROR: arguments 'quantile' and 'vmin', 'vmax' cannot be used together"

Expand All @@ -873,8 +877,12 @@ def plot_phase(self, data, caption='Phase, [rad]',
def plot_phases(self, data, caption='Phase, [rad]', cols=4, size=4, nbins=5, aspect=1.2, y=1.05,
quantile=None, vmin=None, vmax=None, symmetrical=False, **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

if quantile is not None:
assert vmin is None and vmax is None, "ERROR: arguments 'quantile' and 'vmin', 'vmax' cannot be used together"

Expand Down Expand Up @@ -903,8 +911,12 @@ def plot_phases(self, data, caption='Phase, [rad]', cols=4, size=4, nbins=5, asp

def plot_interferogram(self, data, caption='Phase, [rad]', cmap='gist_rainbow_r', aspect=None, **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

plt.figure()
data.plot.imshow(vmin=-np.pi, vmax=np.pi, cmap=cmap)
self.plot_AOI(**kwargs)
Expand All @@ -915,8 +927,12 @@ def plot_interferogram(self, data, caption='Phase, [rad]', cmap='gist_rainbow_r'

def plot_interferograms(self, data, caption='Phase, [rad]', cols=4, size=4, nbins=5, aspect=1.2, y=1.05, **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

# multi-plots ineffective for linked lazy data
fg = data.plot.imshow(
col='pair',
Expand All @@ -932,8 +948,12 @@ def plot_interferograms(self, data, caption='Phase, [rad]', cols=4, size=4, nbin
self.plots_POI(fg, **kwargs)

def plot_correlation(self, data, caption='Correlation', cmap='gray', aspect=None, **kwargs):
import pandas as pd
import matplotlib.pyplot as plt

if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

plt.figure()
data.plot.imshow(vmin=0, vmax=1, cmap=cmap)
self.plot_AOI(**kwargs)
Expand All @@ -943,9 +963,13 @@ def plot_correlation(self, data, caption='Correlation', cmap='gray', aspect=None
plt.title(caption)

def plot_correlations(self, data, caption='Correlation', cmap='auto', cols=4, size=4, nbins=5, aspect=1.2, y=1.05, **kwargs):
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

if isinstance(cmap, str) and cmap == 'auto':
cmap = mcolors.LinearSegmentedColormap.from_list(
name='custom_gray',
Expand All @@ -968,9 +992,13 @@ def plot_correlations(self, data, caption='Correlation', cmap='auto', cols=4, si

def plot_correlation_stack(self, data, threshold=None, caption='Correlation Stack', bins=100, cmap='auto', **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors


if 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

if isinstance(cmap, str) and cmap == 'auto':
cmap = mcolors.LinearSegmentedColormap.from_list(
name='custom_gray',
Expand Down
6 changes: 6 additions & 0 deletions pygmtsar/pygmtsar/Stack_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,13 @@ def psfunction(self, ps='auto', name='ps'):

def plot_psfunction(self, data='auto', caption='PS Function', cmap='gray', quantile=None, vmin=None, vmax=None, **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if isinstance(data, str) and data == 'auto':
data = self.psfunction()
elif 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

if quantile is not None:
assert vmin is None and vmax is None, "ERROR: arguments 'quantile' and 'vmin', 'vmax' cannot be used together"
Expand Down Expand Up @@ -155,6 +158,7 @@ def plot_amplitudes(self, dates=None, data='auto', norm='auto', func=None, inten
caption='auto', cmap='gray', cols=4, size=4, nbins=5, aspect=1.2, y=1.05,
quantile=None, vmin=None, vmax=None, symmetrical=False, **kwargs):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import types

Expand All @@ -166,6 +170,8 @@ def plot_amplitudes(self, dates=None, data='auto', norm='auto', func=None, inten
data = np.abs(self.open_data(dates=dates, scale=scale))
if intensity:
data = np.square(data)
elif 'stack' in data.dims and isinstance(data.coords['stack'].to_index(), pd.MultiIndex):
data = data.unstack('stack')

if func is not None:
data = func(data)
Expand Down
Loading

0 comments on commit d927200

Please sign in to comment.