diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 17f3ddd1..a1a5c3f0 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,8 +1,13 @@ version: 2 +build: + os: "ubuntu-22.04" + tools: + python: "mambaforge-22.9" + sphinx: builder: html - configuration: ./docs/conf.py + configuration: docs/conf.py conda: - environment: ./docs/environment.yml + environment: docs/environment.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aebbd66..86a944ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,62 @@ # CHANGELOG +## v0.3.0 + +>__Note__: The motivation behind the changes in v0.3.0 were that the original +> data gathering setup used by Pyatoa was very abstract, opaque, and +> unncessarily rigid (e.g.,, building path strings out of various components of +> filenames and internal attributes). The new approach to data gathering is to +> use PySEP to perform all data gathering once-and-for-all, including one time +> tasks like instrument removal. The resulting SAC files can then be read in +> with ObsPy and directly fed into the Manager class for misfit quantification. +> This also gives the User much more control over their data gathering without +> getting confused by Pyatoa's internal data gathering system. + +- Removed ``pyatoa.core.gatherer.Gatherer`` class from package entirely, all + data gathering capabilities have been migrated to PySEP, Pyatoa will now only + accept input data as already-defined ObsPy objects +- Removed Gatherer-related tests and documentation from package +- Removed ``paths`` attribute from ``pyatoa.core.config.Config`` and all + references to the paths attribute throughout the package as these were only + accessed by the now removed ``Gatherer`` class +- Changed Pyflex and Pyadjoint configuration building procedure in + ``pyatoa.core.config.Config`` as it was previously abstracted behind a few + unncessary functions. ``Config`` now accepts parameters ``pyflex_parameters`` + and ``pyadjoint_parameters`` (dictionaries) that overwrite default Config + parameters in the underlying Config objects +- Changed ``pyatoa.core.manager.Manager.write()`` to ``write_to_dataset`` to be + clearer in explaning it's role +- Exposed the default preprocessing procedures directly in the + ``Manager.preprocess`` function, rather than having it hidden behind a + function call to a utility script. Users who want to overwrite the + preprocessing need only skip the call to preprocess and perform their own + tasks on the internally defined ``st_obs`` and ``st_syn`` attributes. +- Removed ``pyatoa.core.manager.Manager``'s ability to save to ASDFDataSet mid + workflow (i.e., during window and measure). Manager must now use the + ``write_to_dataset`` function if it wants to save data to an ASDFDataSet +- Removed the ``pyatoa/plugins`` directory which only contained the pyflex + preset dictionaries. These were not very flexible, instead they have been + converted to a docs page for easier accessibility. +- Created Docs page for Pyflex presets that can be copy-pasted into misfit + quantification routines +- Added a ``plt.close('all')`` to the end of the Manager's plot routine as + as a final precaution against leaving an excessive number of Matplotlib + figures open +- Overhauled ``pyatoa.core.manager.Manager.flow_multiband`` to mimic behavior + the standard behavior of ``Manager.flow``, that is: return internal attributes + ``windows`` and ``adjsrcs`` which are component-wise dictionaries that each + contain Pyflex Windows and Pyadjoint AdjointSource objects, respectively. + Previously this function returned dictionaries of dictionaries which needed + to be further manipulated, now the function averages all adjoint sources + from all period bands, and also collects all windows. +- Adjusted and fixed tests based on all the above changes. + +## v0.2.2 + +- Bugfix: Gatherer attempting to access a removed Config parameter +- Resolve PyPDF2 -> PyPDF dependency deprecation warning +- Bugfix: Manager.standardize() only resamples if required, otherwise small time shifting is introduced (Issue \#34) + ## v0.2.1 - Updated internal call structures to deal with Pyadjoint v0.2.1 API changes diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index d0ae7bfc..3f2e4348 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -1 +1,2 @@ Chow, Bryant +Örsvuran, Ridvan diff --git a/docs/README.md b/docs/README.md index 4b1e6c76..2660e50a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -13,13 +13,16 @@ In order to build the Docs locally, you will first need to create a separate Conda environment with a few packages, you can do this by running: ``` bash -conda env create --file environment.yaml +conda env create --file environment_local.yml conda activate pyatoa-docs ``` You can then run the make command to generate the .html files. You can find your local docs in the *_build/html* directory +Note that the file ``environment.yml`` is used for building docs on ReadTheDocs, +but does not contain all required dependencies to build locally. + ```bash make html ``` diff --git a/docs/changelog.rst b/docs/changelog.rst index 26cdb4dd..261ed825 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,8 +1,85 @@ -Change Log -============== - -Version 0.2.0 -~~~~~~~~~~~~~~~ +Changelog +========= + +v0.3.0 +------ + + **Note**: The motivation behind the changes in v0.3.0 were that the + original data gathering setup used by Pyatoa was very abstract, + opaque, and unncessarily rigid (e.g.,, building path strings out of + various components of filenames and internal attributes). The new + approach to data gathering is to use PySEP to perform all data + gathering once-and-for-all, including one time tasks like instrument + removal. The resulting SAC files can then be read in with ObsPy and + directly fed into the Manager class for misfit quantification. This + also gives the User much more control over their data gathering + without getting confused by Pyatoa’s internal data gathering system. + +- Removed ``pyatoa.core.gatherer.Gatherer`` class from package + entirely, all data gathering capabilities have been migrated to + PySEP, Pyatoa will now only accept input data as already-defined + ObsPy objects +- Removed Gatherer-related tests and documentation from package +- Removed ``paths`` attribute from ``pyatoa.core.config.Config`` and + all references to the paths attribute throughout the package as these + were only accessed by the now removed ``Gatherer`` class +- Changed Pyflex and Pyadjoint configuration building procedure in + ``pyatoa.core.config.Config`` as it was previously abstracted behind + a few unncessary functions. ``Config`` now accepts parameters + ``pyflex_parameters`` and ``pyadjoint_parameters`` (dictionaries) + that overwrite default Config parameters in the underlying Config + objects +- Changed ``pyatoa.core.manager.Manager.write()`` to + ``write_to_dataset`` to be clearer in explaning it’s role +- Exposed the default preprocessing procedures directly in the + ``Manager.preprocess`` function, rather than having it hidden behind + a function call to a utility script. Users who want to overwrite the + preprocessing need only skip the call to preprocess and perform their + own tasks on the internally defined ``st_obs`` and ``st_syn`` + attributes. +- Removed ``pyatoa.core.manager.Manager``\ ’s ability to save to + ASDFDataSet mid workflow (i.e., during window and measure). Manager + must now use the ``write_to_dataset`` function if it wants to save + data to an ASDFDataSet +- Removed the ``pyatoa/plugins`` directory which only contained the + pyflex preset dictionaries. These were not very flexible, instead + they have been converted to a docs page for easier accessibility. +- Created Docs page for Pyflex presets that can be copy-pasted into + misfit quantification routines +- Added a ``plt.close('all')`` to the end of the Manager’s plot routine + as as a final precaution against leaving an excessive number of + Matplotlib figures open +- Overhauled ``pyatoa.core.manager.Manager.flow_multiband`` to mimic + behavior the standard behavior of ``Manager.flow``, that is: return + internal attributes ``windows`` and ``adjsrcs`` which are + component-wise dictionaries that each contain Pyflex Windows and + Pyadjoint AdjointSource objects, respectively. Previously this + function returned dictionaries of dictionaries which needed to be + further manipulated, now the function averages all adjoint sources + from all period bands, and also collects all windows. +- Adjusted and fixed tests based on all the above changes. + +v0.2.2 +------ + +- Bugfix: Gatherer attempting to access a removed Config parameter +- Resolve PyPDF2 -> PyPDF dependency deprecation warning +- Bugfix: Manager.standardize() only resamples if required, otherwise + small time shifting is introduced (Issue #34) + +v0.2.1 +------ + +- Updated internal call structures to deal with Pyadjoint v0.2.1 API + changes +- Changed internal test ASDFDataSet and created a script to generate + new dataset because the old one had no way of being remade. +- New Docs + Example + Example data: Processing data with Pyatoa and + MPI +- Remove GitHub Pip install links for PySEP, Pyflex and Pyadjoint + +v0.2.0 +------ - Renamed 'Quickstart' doc to 'A short example', created a new 'Quickstart' doc which has a short code snippet that creates a figure. - Revamped documentation, switched to new style of building documentation using only .rst files (rather than building off of Jupyter notebooks directly in RTD, which was high in memory consumption) diff --git a/docs/conf.py b/docs/conf.py index 8309cae7..b9c9e061 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,13 @@ author = 'adjTomo Dev Team' # The short X.Y version -version = '0.2.2' +# Grab version number from 'pyproject.toml' +with open("../pyproject.toml", "r") as f: + _lines = f.readlines() +for _line in _lines: + if _line.startswith("version"): + version = _line.split('"')[1].strip() + # The full version, including alpha/beta/rc tags release = '' @@ -75,7 +81,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/discovery.rst b/docs/discovery.rst deleted file mode 100644 index 85c48465..00000000 --- a/docs/discovery.rst +++ /dev/null @@ -1,271 +0,0 @@ -Data Discovery -============== - -The :class:`Manager ` class has internal -routines in the :meth:`gather ` function -that automatically discover/gather waveforms and metadata, given a set -of search paths and rules. These routines are defined in the source code by the -:class:`Gatherer ` class. - -This functionality replaces the direct passing of ObsPy objects to the Manager -as described in the `Misfit Quantification `__ documentation. It -is meant to facilitate automated data discovery within larger workflow tools -like `SeisFlows `__. - -Data can also be discovered via ASDFDataSets, which is explained in the -`storage docs page `__. - -.. note:: - - *In a nutshell*: Users provide path and tag information (event id, - station code) and the Gatherer class builds a set of wild card search paths - in which to look for and read corresponding data/metadata. - -Event Metadata -~~~~~~~~~~~~~~ - -Event metadata can be gathered locally via a search path, using an event ID. - -.. code:: python - - from pyatoa import Config, Manager - - event_paths = ["path/to/events", "path/to/other_events"] - - cfg = Config(paths={"events": event_paths}) - mgmt = Manager(config=cfg) - mgmt.gather_events(event_id="001", prefix="CMTSOLUTION_", suffix="test") - -The above code block will attempt to look for events under a specific path -structure: - -.. code:: bash - - {path}/{prefix}{event_id}{suffix} - -Which in this example would be: - -- `path/to/events/CMTSOLUTION_001test` -- `path/to/other_events/CMTSOLUTION_001test` - -The optional ``prefix`` and ``suffix`` allow a User to wrap text around -the ``event_id``. - -The Manager will loop through available paths until a matching file is found. -If no file is found, the Manager will throw a -:class:`pyatoa.core.gatherer.GathererNoDataException`. - -Station Metadata -~~~~~~~~~~~~~~~~ - -Station response information can be gathered via a local search path using the -station and network codes: - -.. code:: python - - from pyatoa import Config, Manager - - response_paths = ["path/to/responses", "path/to/other_responses"] - - cfg = Config(paths={"responses": response_paths}) - mgmt = Manager(config=cfg) - mgmt.gather_stations(code="NZ.BFZ..HH?", loc="", cha="*") - -The above code block will attempt to look for StationXML files under the -following path structure: - -.. code:: bash - - {path}/{net}.{sta}/RESP.{net}.{sta}.{loc}.{cha} - -Which in this example would be be: - -- path/to/responses/NZ.BFZ/RESP.NZ.BFZ..* -- path/to/other_responses/NZ.BFZ/RESP.NZ.BFZ..* - -The optional `loc` and `cha` parameters can be set as specific location and -channel codes, or as wildcards to make the search more general. - -.. note:: - - Users who use `PySEP `__ to gather their - data can automatically output station metadata in this required directory - format. - -Custom Path Structure -````````````````````` - -Users who want to input their own custom path and filename structure for -gathering station metadata can do so. The path can make use of formatting codes -`net`, `sta`, `loc`, and `cha` (network, station, location, channel). - -.. code:: python - - cfg = Config(paths={"responses": ["./"]}) - mgmt.gather_stations(code="NZ.BFZ..HH?", resp_dir_template="", - resp_fid_template="{net}_{sta}_RESPONSE") - -The above code block will attempt to look for StationXML files under the -following path structure: - -.. code:: bash - - {path}//{net}_{sta}_RESPONSE - -Which in this example would be be: - -- `.//NZ_BFZ_RESPONSE` - - -Observed Waveforms -~~~~~~~~~~~~~~~~~~ - -Observed waveforms can be searched for via local paths under specific path -and filenaming structure. Observed waveforms require an event origin time -to search for specific waveform start and end times. - -.. code:: python - - from obspy import UTCDateTime - from pyatoa import Config, Manager - - waveform_paths = ["path/to/waveforms", "path/to/other_waveforms"] - - cfg = Config(paths={"observed": waveform_paths}) - - mgmt = Manager(config=cfg, start_pad=50, end_pad=200) - mgmt.gatherer.origintime = UTCDateTime("2000-01-01T00:00:00") # example - mgmt.gather_observed(code="NZ.BFZ..*") - -The above code block will attempt to look for observed waveform data under -the following path structure: - -.. code:: bash - - {path}/{year}/{network}/{station}/{channel}*/{net}.{sta}.{loc}.{cha}.{year}.{jday:0>3} - -Which in this example would be be: - -- `path/to/waveforms/NZ/BFZ/**/NZ.BFZ..*.2000.001` -- `path/to/waveforms/NZ/BFZ/**/NZ.BFZ..*.1999.365` -- `path/to/other_waveforms/NZ/BFZ/**/NZ.BFZ..*.2000.001` -- `path/to/other_waveforms/NZ/BFZ/**/NZ.BFZ..*.1999.365` - - -The gathering routine uses the -:class:`Config ` attributes -``start_pad`` and ``end_pad`` to define how the search period over time. In this -example, the example origin time is at midnight (00:00:00), so -the Manager knows to look for data on both 2000-01-01 and 1999-12-31. - -.. note:: - - Users who use `PySEP `__ to gather their - data can automatically output observed waveforms in this format. - - -Custom Path Structure -````````````````````` - -Users who want to input their own custom path and filename structure for -gathering station metadata can do so. The path can make use of formatting codes -`net`, `sta`, `loc`, and `cha`, `year` and `jday` (network, station, location, -channel, year, julian day). - -.. code:: python - - cfg = Config(paths={"observed": "./"}) - mgmt = Manager(config=cfg, start_pad=50, end_pad=200) - mgmt.gatherer.origintime = UTCDateTime("2000-01-01T00:00:00") # example - mgmt.gather_observed(code="NZ.BFZ..*", obs_dir_template="", - obs_fid_template="{net}_{sta}_{year}.{jday}.ms") - -The above code block will attempt to look for observed waveforms under the -following path structure: - -.. code:: bash - - {path}/{net}_{sta}_{year}.{jday}.ms - -Which in this example would be be: - -- `./NZ_BFZ_2000.001.ms` -- `./NZ_BFZ_1999.365.ms` - - -Synthetic Waveforms -~~~~~~~~~~~~~~~~~~~ - -Synthetic waveforms can be discovered via local path given a specific event ID. -The default filenaming structure is meant to match synthetics output by -SPECFEM2D/3D/3D_GLOBE. - -.. code:: python - - from pyatoa import Config, Manager - - synthetic_paths = ["path/to/synthetics", "path/to/other_synthetics"] - - cfg = Config(paths={"synthetics": synthetic_paths}) - mgmt = Manager(config=cfg) - mgmt.gather_synthetic(code="NZ.BFZ..HH?", syn_unit="?") - -The above code block will attempt to look for synthetic waveforms under the -following path structure: - -.. code:: bash - - {path}/{net}.{sta}.*{cmp}.sem{dva}* - -Which in this example would be be: - -- `path/to/synthetics/NZ.BFZ.*?.sem?*` -- `path/to/other_synthetics/NZ.BFZ.*?.sem?*` - - -Custom Path Structure -````````````````````` - -Users who want to input their own custom path and filename structure for -gathering synthetics can do so. - -.. code:: python - - cfg = Config(paths={"synthetics": "./"}) - mgmt = Manager(config=cfg) - mgmt.gather_synthetic(code="NZ.BFZ..HH?", syn_unit="?", - syn_dir_template="synthetics", - syn_fid_template="{net}_{sta}_{cha}") - -The above code block will attempt to look for synthetic waveforms under the -following path structure: - -.. code:: bash - - {path}/{syn_dir_template}/{syn_fid_template} - -Which in this example would be be: - -- `./synthetics/NZ_BFZ_HH?` - - -Combined Gathering Call -~~~~~~~~~~~~~~~~~~~~~~~ - -Users can chain together all of the above gathering into a single call by -setting paths all together and calling the -:meth:`gather ` function. - -.. code:: python - - cfg = Config(paths={"event": [path_to_events], - "responses": [path_to_responses], - "observed": [path_to_observed], - "synthetics": [path_to_synthetics] - }) - mgmt.gather(event_id="CMTSOlUTION_001", code="NZ.BFZ..HH?", - choice=["event", "inv", "st_obs", "st_syn"]) - -This function will run all gathering functions one after another. If any part -of the gathering call fails, the Manager will throw a -:class:`pyatoa.core.gatherer.GathererNoDataException`. diff --git a/docs/environment.yml b/docs/environment.yml index a627ee2b..7bf33650 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -4,11 +4,7 @@ channels: dependencies: - sphinx - sphinx_rtd_theme - - ipykernel - - nbsphinx - - jupyter - myst-parser - pip - pip: - sphinx-autoapi - - sphinx_rtd_theme diff --git a/docs/environment_local.yml b/docs/environment_local.yml new file mode 100644 index 00000000..a627ee2b --- /dev/null +++ b/docs/environment_local.yml @@ -0,0 +1,14 @@ +name: pyatoa-docs +channels: + - conda-forge +dependencies: + - sphinx + - sphinx_rtd_theme + - ipykernel + - nbsphinx + - jupyter + - myst-parser + - pip + - pip: + - sphinx-autoapi + - sphinx_rtd_theme diff --git a/docs/ex_w_mpi.rst b/docs/ex_w_mpi.rst index 1eb37889..1a274064 100644 --- a/docs/ex_w_mpi.rst +++ b/docs/ex_w_mpi.rst @@ -11,7 +11,8 @@ values for each source-receiver pair. .. warning:: This example requires `mpi4py `_ - which is **not** a dependency of Pyatoa. To install mpi4py to the Conda + and `PySEP `__ + which are **not** a dependency of Pyatoa. To install to the Conda environment created from the `installation `_ section, you can run: @@ -19,6 +20,7 @@ values for each source-receiver pair. $ conda activate pyatoa $ conda install mpi4py + $ pip install pysep-adjtomo Example Data ------------ diff --git a/docs/index.rst b/docs/index.rst index ffa37cdb..e4bedc6e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -115,7 +115,7 @@ publication: :caption: Tutorials misfit - discovery + windows storage inspector standards diff --git a/docs/misfit.rst b/docs/misfit.rst index b22778ae..483eb560 100644 --- a/docs/misfit.rst +++ b/docs/misfit.rst @@ -56,10 +56,6 @@ parameters. iteration: None step_count: None event_id: None - GATHER - start_pad: 20 - end_pad: 500 - save_to_ds: True PROCESS min_period: 10.0 max_period: 30.0 diff --git a/docs/windows.rst b/docs/windows.rst new file mode 100644 index 00000000..abd47da0 --- /dev/null +++ b/docs/windows.rst @@ -0,0 +1,360 @@ +Windowing Parameters +==================== + +`Pyflex `__ is a Python port of the FLEXWIN +algorithm for automatically selecting windows between data and synthetic +waveforms for seismic tomography. + +Below are a number of presets values for Pyflex which have been used for various +tomography studies. They are provided as reference starting points for Users to +determine the parameters required for their own studies. + +See the `Pyflex Config class `__ +for detailed descriptions of parameters. A few descriptions of a commonly used +but non-intuitive parameters are below: + + - ``stalta_waterlevel``: reject windows where short term average over + long term average (STA/LTA) waveform dips below this threshold value. + Values between 0 and 1 + - ``c_0``: STA/LTA water level; reject if + **window.stalta.min() < c_0 * stalta_waterlevel** + - ``c_1``: minimum acceptable window length; reject if + **min_acceptable_window_length = c_1 * T_min** + - ``c_2``: STA/LTA prominence rejection, reject if + **window.stalta.min() < c_2 * window.stalta.max()** + - ``c_3a``: Separation height in phase separation; reject if + **d_stlta > c_3a * d_stalta_center * f_time** + where *d_stalta* is the current max height above min and + *d_stalta_center* is the central max height above min and + *f_time* is the time decay function + - ``c_3b``: Separation height in phase separation; where + *d_time* = separation between center of window and internal + maxima. If **d_time > c_3b then *f_time* is a time decay function, else + it is 1 if `c_3b` goes down + - ``c_4a``: Curtail emergent signals; reject if + **time_decay_left = T_min * c_4a / dt** + - ``c_4b``: Curtail coda waves and decaying signals; reject if + **time_decay_right: T_min * c_4b / dt** + +Default +~~~~~~~ +The default windowing parameters set in ``pyflex.core.config.Config()``. + +.. note:: + + All the remaining presets only provide parameters if they overwrite the + Default parameters shown here. + +.. code:: yaml + + stalta_waterlevel: 0.07 + tshift_acceptance_level: 10.0 + tshift_reference: 0.0 + dlna_acceptance_level: 1.3 + dlna_reference: 0.0 + cc_acceptance_level: 0.7 + s2n_limit: 1.5 + earth_model: "ak135" + min_surface_wave_velocity: 3.0 + max_time_before_first_arrival: 50.0 + c_0: 1.0 + c_1: 1.5 + c_2: 0.0 + c_3a: 4.0 + c_3b: 2.5 + c_4a: 2.0 + c_4b: 6.0 + check_global_data_quality: False + snr_integrate_base: 3.5 + snr_max_base: 3.0 + noise_start_index: 0 + noise_end_index: None + signal_start_index: None + signal_end_index: -1 + window_weight_fct: None + window_signal_to_noise_type: "amplitude" + resolution_strategy: "interval_scheduling" + +----------------------- + + +Maggi et al. (2009) +~~~~~~~~~~~~~~~~~~~ +The following windowing parameters are defined for various regions and scales +in Table 3 of the original FLEXWIN publication of +`Maggi et al. (2009) `__. + +Global (50--150s) +````````````````` + +.. code:: yaml + + s2n_limit: 2.5 + stalta_waterlevel: 0.08 + cc_acceptance_level: 0.85 + tshift_acceptance_level: 15.0 + dlna_acceptance_level: 1. + tshift_reference: 0. + dlna_reference: 0. + c_0: 0.7 + c_1: 4. + c_2: 0.3 + c_3a: 1.0 + c_3b: 2.0 + c_4a: 3. + c_4b: 10.0 + +Japan (6--30s) +````````````````` + +.. code:: yaml + + s2n_limit: 3. + stalta_waterlevel: 0.12 + cc_acceptance_level: 0.73 + tshift_acceptance_level: 3.0 + dlna_acceptance_level: 1.5 + tshift_reference: 0. + dlna_reference: 0. + c_0: 0.7 + c_1: 3. + c_2: 0.6 + c_3a: 1.0 + c_3b: 2.0 + c_4a: 3. + c_4b: 12.0 + +Southern California (6--30s) +`````````````````````````````` + +.. code:: yaml + + s2n_limit: 3. + stalta_waterlevel: 0.18 + cc_acceptance_level: 0.71 + tshift_acceptance_level: 8.0 + dlna_acceptance_level: 1.5 + tshift_reference: 4. + dlna_reference: 0. + c_0: 0.7 + c_1: 2. + c_2: 0. + c_3a: 3.0 + c_3b: 2.0 + c_4a: 2.5 + c_4b: 12.0 + +Southern California (3--30s) +```````````````````````````` + +.. code:: yaml + + s2n_limit: 4. + stalta_waterlevel: 0.11 + cc_acceptance_level: 0.8 + tshift_acceptance_level: 4.0 + dlna_acceptance_level: 1. + tshift_reference: 2. + dlna_reference: 0. + c_0: 1.3 + c_1: 4. + c_2: 0. + c_3a: 4.0 + c_3b: 2.5 + c_4a: 2. + c_4b: 6.0 + +Southern California (2--30s) +```````````````````````````` + +.. code:: yaml + + s2n_limit: 4. + stalta_waterlevel: 0.07 + cc_acceptance_level: 0.85 + tshift_acceptance_level: 3.0 + dlna_acceptance_level: 1. + tshift_reference: 1. + dlna_reference: 0. + c_0: 1. + c_1: 5. + c_2: 0. + c_3a: 4.0 + c_3b: 2.5 + c_4a: 2. + c_4b: 6.0 + +------------------------------------ + +Chow et al. (2022) +~~~~~~~~~~~~~~~~~~ +The following parameter sets were used to derive `NZATOM_NORTH `__ +an adjoint tomography model for the North Island of New Zealand. The results of this study +are published in `Chow et al. (2022a) `__. + +The parameter set is split into various inversion legs which tackle different +passbands of the dataset. + +Leg 1 (15--30s) +``````````````````````` +.. code:: yaml + + stalta_waterlevel: 0.08 + tshift_acceptance_level: 12.0 + dlna_acceptance_level: 2.5 + cc_acceptance_level: 0.7 + s2n_limit: 2.5 + max_time_before_first_arrival: 10. + min_surface_wave_velocity: 1.2 + check_global_data_quality: True + c_0: 0.7 + c_1: 2.0 + c_3a: 1.0 + c_3b: 2.0 + c_4a: 3.0 + c_4b: 10.0 + +Leg 2 (10--30s) +``````````````````````` +.. code:: yaml + + stalta_waterlevel: 0.10 + tshift_acceptance_level: 8.0 # based on sign-flip + dlna_acceptance_level: 2.0 + cc_acceptance_level: 0.7 + s2n_limit: 3. + max_time_before_first_arrival: 5. + min_surface_wave_velocity: 1.2 + check_global_data_quality: True + c_0: 0.7 + c_1: 2.0 + c_3a: 3.0 + c_3b: 2.0 + c_4a: 2.5 + c_4b: 12.0 + +Leg 3 (8--30s) +`````````````````````` +.. code:: yaml + + stalta_waterlevel: 0.10 + tshift_acceptance_level: 8.0 + dlna_acceptance_level: 1.5 + cc_acceptance_level: 0.7 + s2n_limit: 3. + max_time_before_first_arrival: 5. + min_surface_wave_velocity: 1.1 + check_global_data_quality: True + c_0: 0.7 + c_1: 2.0 # min window = c1 * tmin = 16s + c_3a: 4.0 + c_3b: 2.0 + c_4a: 2.5 + c_4b: 12.0 + +Leg 4 (6--30s) +`````````````````````` +.. code:: yaml + + stalta_waterlevel: 0.08 + tshift_acceptance_level: 8. + dlna_acceptance_level: 1.5 + cc_acceptance_level: 0.60 + s2n_limit: 3. + max_time_before_first_arrival: 5. + min_surface_wave_velocity: 1.05 + check_global_data_quality: True + snr_integrate_base: 3.5 # exclude noisy data + c_0: 0.8 # reject if win.stalta.min < c_0 * stalta_wl + c_1: 2.0 # min window = c1 * tmin = 12s + c_3a: 3.0 + c_3b: 2.0 + c_4a: 2.5 + c_4b: 12.0 + +Leg 5 (4--30s) +`````````````````````` +.. code:: yaml + + stalta_waterlevel: 0.075 + tshift_acceptance_level: 6. + dlna_acceptance_level: 1.5 + cc_acceptance_level: 0.65 + s2n_limit: 4. + max_time_before_first_arrival: 5. + min_surface_wave_velocity: 1.0 + check_global_data_quality: True + snr_integrate_base: 3.5 # exclude noisy data + c_0: 0.9 # reject if win.stalta.min < c_0 * stalta_wl + c_1: 3. + c_3a: 3.5 + c_3b: 2.25 + c_4a: 2.25 + c_4b: 9.0 + +Posthoc Analysis (6--30s) +`````````````````````````` +This was used for posthoc evaluation of the final model using events not +inverted for during the inversion. + +.. code:: yaml + + stalta_waterlevel: 0.08 + tshift_acceptance_level: 12. + dlna_acceptance_level: 1.5 + cc_acceptance_level: 0.60 + s2n_limit: 3. + max_time_before_first_arrival: 5. + min_surface_wave_velocity: 1.05 + check_global_data_quality: True + snr_integrate_base: 3.5 # exclude noisy data + c_0: 0.8 # reject if win.stalta.min < c_0 * stalta_wl + c_1: 2.0 # min window = c1 * tmin = 12s + c_3a: 3.0 + c_3b: 2.0 + c_4a: 2.5 + c_4b: 12.0 + + +Ristau 1D (10--30s) +``````````````````````````` +Used for windowing synthetic waveforms generated using the 1D model of the +North Island of New Zealand generated from `Ristau (2008) `__ +and analyzed in `Chow et al. (2020) `__. + +.. code:: yaml + + stalta_waterlevel: 0.10 + tshift_acceptance_level: 120 + dlna_acceptance_level: 20 + cc_acceptance_level: 0675 + s2n_limit: 3 + max_time_before_first_arrival: 5 + min_surface_wave_velocity: 16 + check_global_data_quality: True + c_0: 07 + c_1: 20 + c_3a: 30 + c_3b: 20 + c_4a: 25 + c_4b: 120 + +Ristau 1D (8--30s) +``````````````````````````` + +.. code:: yaml + + stalta_waterlevel: 0.08 + tshift_acceptance_level: 10.0 + dlna_acceptance_level: 2.0 + cc_acceptance_level: 0.675 + s2n_limit: 3. + max_time_before_first_arrival: 5. + min_surface_wave_velocity: 1.4 + check_global_data_quality: True + c_0: 0.7 + c_1: 2.5 + c_3a: 3.0 + c_3b: 2.0 + c_4a: 2.5 + c_4b: 12.0 diff --git a/pyatoa/__init__.py b/pyatoa/__init__.py index c2733591..9b70a2bc 100644 --- a/pyatoa/__init__.py +++ b/pyatoa/__init__.py @@ -15,6 +15,5 @@ from pyatoa.core.config import Config # NOQA from pyatoa.core.manager import Manager, ManagerError # NOQA from pyatoa.core.executive import Executive # NOQA -from pyatoa.core.gatherer import Gatherer # NOQA from pyatoa.core.inspector import Inspector # NOQA diff --git a/pyatoa/core/config.py b/pyatoa/core/config.py index c7a8a00d..fa7ca943 100755 --- a/pyatoa/core/config.py +++ b/pyatoa/core/config.py @@ -2,19 +2,15 @@ """ Configuration of User-set parameters within the package. Contains external functions to set Config objects of Pyflex and Pyadjoint. - -To Do 02.04.21: Allow config to set pyflex and pyadjoint config as a function, - currently it's obscured behind some private functions """ import yaml import numpy as np from copy import deepcopy -from pyatoa import logger from pyatoa.utils.form import format_iter, format_step -from pyatoa.plugins.pyflex_presets import pyflex_presets from pyflex import Config as PyflexConfig -from pyadjoint import get_config, ADJSRC_TYPES +from pyadjoint import get_config as get_pyadjoint_config +from pyadjoint import ADJSRC_TYPES class Config: @@ -28,11 +24,10 @@ class Config: def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, step_count=None, event_id=None, min_period=10, max_period=100, rotate_to_rtz=False, unit_output="DISP", component_list=None, - pyflex_preset="default", adj_src_type="cc_traveltime", - observed_tag="observed", synthetic_tag=None, - st_obs_type="obs", st_syn_type="syn", - win_amp_ratio=0., paths=None, save_to_ds=True, - **kwargs): + adj_src_type="cc_traveltime", observed_tag="observed", + synthetic_tag=None, st_obs_type="obs", st_syn_type="syn", + win_amp_ratio=0., pyflex_parameters=None, + pyadjoint_parameters=None): """ Initiate the Config object either from scratch, or read from external. @@ -42,15 +37,12 @@ def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, :type yaml_fid: str :param yaml_fid: id for .yaml file if config is to be loaded externally - :type seisflows_yaml: str - :param seisflows_yaml: id for the seisflows parameters.yaml file that - needs a special read function. Used in conjunction with SeisFlows. :type iteration: int :param iteration: if running an inversion, the current iteration. Used for internal path naming, as well as interaction with Seisflows via Pyaflowa. :type step_count: int - :param step: if running an inversion, the current step count in the + :param step_count: if running an inversion, the current step count in the line search, will be used for internal path naming, and interaction with Seisflows via Pyaflowa. :type event_id: str @@ -64,8 +56,6 @@ def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, :type unit_output: str :param unit_output: units of stream, to be fed into preprocessor for instrument response removal. Available: 'DISP', 'VEL', 'ACC' - :type pyflex_preset: str - :param pyflex_preset: name to map to pyflex preset config :type adj_src_type: str :param adj_src_type: method of misfit quantification for Pyadjoint :type st_obs_type: str @@ -89,20 +79,19 @@ def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, :param synthetic_tag: Tag to use for asdf dataset to label and search for obspy streams of synthetic data. Default 'synthetic_{model_num}' Tag must be formatted before use. - :type paths: dict of str - :param paths: any absolute paths for Pyatoa to search for - waveforms in. If path does not exist, it will automatically be - skipped. Allows for work on multiple machines, by giving multiple - paths for the same set of data, without needing to change config. - Waveforms must be saved in a specific directory structure with a - specific naming scheme - :type save_to_ds: bool - :param save_to_ds: allow toggling saving to the dataset when new data - is gathered/collected. This is useful, e.g. if a dataset that - contains data is passed to the Manager, but you don't want to - overwrite the data inside while you do some temporary processing. - :raises ValueError: If kwargs do not match Pyatoa, Pyflex or Pyadjoint - attribute names. + :type pyflex_parameters: dict + :param pyflex_parameters: overwrite for Pyflex parameters defined + in the Pyflex.Config object. Incorrectly defined argument names + will raise a TypeError. See Pyflex docs for detailed parameter defs: + http://adjtomo.github.io/pyflex/#config-object + :type pyadjoint_parameters: dict + :param pyadjoint_parameters: overwrite for Pyadjoint parameters defined + in the Pyadjoint.Config object for the given `adj_src_type`. + Incorrectly defined argument names will raise a TypeError. See + Pyadjoint docs for detailed parameter definitions: + https://adjtomo.github.io/pyadjoint/ + :raises TypeError: If incorrect arguments provided to the underlying + Pyflex or Pyadjoint Config objects. """ self.iteration = iteration self.step_count = step_count @@ -117,29 +106,16 @@ def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, # on calling property for actual value self._synthetic_tag = synthetic_tag - self.pyflex_preset = pyflex_preset self.adj_src_type = adj_src_type self.st_obs_type = st_obs_type self.st_syn_type = st_syn_type self.win_amp_ratio = win_amp_ratio self.component_list = component_list - self.save_to_ds = save_to_ds - - # Empty init because these are filled by self._check() + # To be filled in by reading or with default parameters self.pyflex_config = None self.pyadjoint_config = None - # Make sure User provided paths are list objects - if paths: - for key in paths: - if not isinstance(paths[key], list): - paths[key] = [paths[key]] - self.paths = paths - else: - self.paths = {"waveforms": [], "synthetics": [], "responses": [], - "events": []} - # If reading from a YAML file or from a dataset, do not set the external # Configs (pyflex and pyadjoint) because these will be read in verbatim if ds or yaml_fid: @@ -152,7 +128,16 @@ def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, # names and keyword arguments else: # Set Pyflex and Pyadjoint Config objects as attributes - self._set_external_configs(**kwargs) + pyflex_parameters = pyflex_parameters or {} + self.pyflex_config = PyflexConfig(min_period=min_period, + max_period=max_period, + **pyflex_parameters) + pyadjoint_parameters = pyadjoint_parameters or {} + # Double difference flag will be set by the adjoint source type + self.pyadjoint_config = get_pyadjoint_config( + adjsrc_type=adj_src_type, min_period=min_period, + max_period=max_period, **pyadjoint_parameters + ) # Run internal sanity checks self._check() @@ -169,14 +154,13 @@ def __str__(self): f" {'event_id:':<25}{self.event_id}\n" ) # Format the remainder of the keys identically - key_dict = {"Gather": ["save_to_ds"], - "Process": ["min_period", "max_period", "unit_output", + key_dict = {"Process": ["min_period", "max_period", "unit_output", "rotate_to_rtz", "win_amp_ratio", "st_obs_type", - "st_obs_type"], + "st_syn_type"], "Labels": ["component_list", "observed_tag", - "synthetic_tag", "paths"], - "External": ["pyflex_preset", "adj_src_type", - "pyflex_config", "pyadjoint_config" + "synthetic_tag"], + "External": ["adj_src_type", "pyflex_config", + "pyadjoint_config" ] } for key, items in key_dict.items(): @@ -261,22 +245,9 @@ def _check(self): assert(self.unit_output in acceptable_units), \ f"unit_output should be in {acceptable_units}" - # Check that paths are in the proper format, dictated by Pyatoa - required_keys = ['synthetics', 'waveforms', 'responses', 'events'] - assert(isinstance(self.paths, dict)), "paths should be a dict" - for key in self.paths.keys(): - assert(key in required_keys), \ - f"path keys can only be in {required_keys}" - - # Make sure that all the required keys are given in the dictionary - for key in required_keys: - if key not in self.paths.keys(): - self.paths[key] = [] - # Set the component list. Rotate component list if necessary if self.rotate_to_rtz: if not self.component_list: - logger.info("component list set to R/T/Z") self.component_list = ["R", "T", "Z"] else: for comp in ["N", "E"]: @@ -284,7 +255,6 @@ def _check(self): f"rotated component list cannot include '{comp}'" else: if not self.component_list: - logger.info("component list set to E/N/Z") self.component_list = ["E", "N", "Z"] # Check that the amplitude ratio is a reasonable number @@ -295,38 +265,6 @@ def _check(self): assert(self.adj_src_type in ADJSRC_TYPES), \ f"Pyadjoint `adj_src_type` must be in {ADJSRC_TYPES}" - def _set_external_configs(self, check_unused=False, **kwargs): - """ - Set the Pyflex and Pyadjoint Config parameters using kwargs provided - to the init function. Allows the user to request unused kwargs be - returned with an Error statement. - - :type check_unnused: bool - :param check_unnused: check if kwargs passed to Pyadjoint and Pyflex - do not match config parameters for either, which may mean - misspelled parameter names, or just kwargs that were not meant for - either - :raises ValueError: if check_unnused is True and unnused kwargs found - """ - # Set Pyflex confict through wrapper function - self.pyflex_config, unused_kwargs_pf = set_pyflex_config( - choice=self.pyflex_preset, min_period=self.min_period, - max_period=self.max_period, **kwargs - ) - - # Set Pyadjoint Config - self.pyadjoint_config, unused_kwargs_pa = set_pyadjoint_config( - adjsrc_type=self.adj_src_type, - min_period=self.min_period, max_period=self.max_period, **kwargs - ) - - if check_unused: - # See if both Pyflex and Pyadjoint threw the same unacceptable kwarg - unused_kwargs = set(unused_kwargs_pf) & set(unused_kwargs_pa) - if unused_kwargs: - raise ValueError(f"{list(unused_kwargs)} are not keyword " - f"arguments in Pyatoa, Pyflex or Pyadjoint.") - def _get_aux_path(self, default="default", separator="/"): """ Pre-formatted path to be used for tagging and identification in @@ -429,7 +367,7 @@ def read(self, read_from, path=None, fmt=None): if fmt.lower() == "yaml": try: self._read_yaml(read_from) - except ValueError: + except ValueError as e: print(f"Unknown yaml format for file {read_from}, {e}") elif fmt.lower() == "asdf": assert(path is not None), "path must be defined" @@ -530,11 +468,6 @@ def _read_yaml(self, filename): unused_kwargs = {} for key, item in attrs.items(): if hasattr(self, key.lower()): - # Special case: ensure paths don't overwrite, but append - - if key == "paths": - for cfgkey, cfgitem in self.paths.items(): - item[cfgkey] += cfgitem setattr(self, key.lower(), item) else: unused_kwargs[key.lower()] = item @@ -568,8 +501,6 @@ def _read_asdf(self, ds, path): cfgin = ds.auxiliary_data.Configs[path].parameters # Parameters from flattened dictionaries will need special treatment - paths = {"waveforms": [], "synthetics": [], "responses": [], - "events": []} pyflex_config, pyadjoint_config = {}, {} for key, item in cfgin.items(): @@ -583,10 +514,7 @@ def _read_asdf(self, ds, path): item = item.tolist() # Put the item in the correct dictionary - if "paths" in key: - # e.g. paths_waveforms -> waveforms - paths[key.split('_')[1]].append(item) - elif "pyflex_config" in key: + if "pyflex_config" in key: # Ensure that empties are set to NoneType pyflex_config["_".join(key.split('_')[2:])] = item elif "pyadjoint_config" in key: @@ -596,85 +524,9 @@ def _read_asdf(self, ds, path): # Normal Config attribute setattr(self, key, item) - # Assign the flattened dictionaries back into nested dictionaries - setattr(self, "paths", paths) - - pyflex_config, _ = set_pyflex_config(**pyflex_config, choice=None) - setattr(self, "pyflex_config", pyflex_config) - - pyadjoint_config, _ = set_pyadjoint_config(**pyadjoint_config) - setattr(self, "pyadjoint_config", pyadjoint_config) - - -def set_pyflex_config(min_period, max_period, choice=None, **kwargs): - """ - Overwriting the default Pyflex parameters with User-defined criteria - - :type choice: str or dict - :param choice: name of map to choose the Pyflex config options, if None, - default values are used. Kwargs can still overload default values. - Also dicts can be passed in as User-defined preset - :type min_period: float - :param min_period: min period of the data - :type max_period: float - :param max_period: max period of the data - :rtype: pyflex.Config - :return: the pyflex Config option to use when running Pyflex - """ - # Instantiate the pyflex Config object - pfconfig = PyflexConfig(min_period=min_period, max_period=max_period) - - # Set preset configuration parameters based on hard-coded presets - if isinstance(choice, str): - if choice in pyflex_presets.keys(): - preset = pyflex_presets[choice] - for key, item in preset.items(): - setattr(pfconfig, key, item) - else: - raise KeyError( - f"'{choice}' does not match any available presets for Pyflex. " - f"Presets include {list(pyflex_presets.keys())}" - ) - # Allow dictionary object to be passed in as a preset - elif isinstance(choice, dict): - for key, item in choice.items(): - setattr(pfconfig, key, item) - - # Kwargs can also be passed from the pyatoa.Config object to avoid having to - # define pre-set values. Kwargs will override preset values - unused_kwargs = [] - for key, item in kwargs.items(): - if hasattr(pfconfig, key): - setattr(pfconfig, key, item) - else: - unused_kwargs.append(key) - - return pfconfig, unused_kwargs - - -def set_pyadjoint_config(adjsrc_type, min_period, max_period, **kwargs): - """ - Set the Pyadjoint config based on Pyatoa Config parameters. - Kwargs can be fed to the Pyadjoint Config object. Returns unnused kwargs. - - Config parameters can be found at: - http://adjtomo.github.io/pyadjoint/autoapi/pyadjoint/config/index.html - - :type min_period: float - :param min_period: min period of the data - :type max_period: float - :param max_period: max period of the data - :rtype cfgout: pyadjoint.Config - :return cfgout: properly set pyadjoint configuration object - """ - paconfig = get_config(adjsrc_type, min_period=min_period, - max_period=max_period) - unused_kwargs = [] - for key, item in kwargs.items(): - if hasattr(paconfig, key): - setattr(paconfig, key, item) - else: - unused_kwargs.append(key) - - return paconfig, unused_kwargs + # Set Pyflex and Pyadjoint Config objects as attributes + self.pyflex_config = PyflexConfig(**pyflex_config) + # Double difference is stored but not required + pyadjoint_config.pop("double_difference") + self.pyadjoint_config = get_pyadjoint_config(**pyadjoint_config) diff --git a/pyatoa/core/gatherer.py b/pyatoa/core/gatherer.py deleted file mode 100755 index 21aab748..00000000 --- a/pyatoa/core/gatherer.py +++ /dev/null @@ -1,607 +0,0 @@ -#!/usr/bin/env python -""" -Mid and Low level data gathering classes to retrieve data from local filesystems -either via an ASDFDataSet or through a pre-defined data directory structure. - -Gatherer directly called by the Manager class and shouldn't need to be called -by the User unless for bespoke data gathering functionality. -""" -import os -import glob -import warnings - -from pyasdf import ASDFWarning -from pysep.utils.io import read_sem, read_specfem2d_source, read_forcesolution -from obspy import Stream, read, read_inventory, read_events - -from pyatoa import logger -from pyatoa.utils.form import format_event_name -from pyatoa.utils.calculate import overlapping_days -from pyatoa.utils.srcrcv import merge_inventories - - -class GathererNoDataException(Exception): - """ - Custom exception to be thrown generally to Manager class in the case that - gathering of any data fails. - """ - pass - - -class Gatherer: - """ - A mid-level data gathering class used to get data internally and externally. - All saving to ASDFDataSet taken care of by the Gatherer class. - """ - def __init__(self, config, ds=None, origintime=None): - """ - :type config: pyatoa.core.config.Config - :param config: configuration object that contains necessary parameters - to run through the Pyatoa workflow - :type ds: pyasdf.asdf_data_set.ASDFDataSet - :param ds: dataset for internal data searching and saving - """ - self.ds = ds - self.config = config - self.origintime = origintime - - def gather_event(self, event_id=None, **kwargs): - """ - Gather an ObsPy Event object by searching disk - Event info need only be retrieved once per Pyatoa workflow. - - :type event_id: str - :param event_id: a unique event idenfitier to search and tag event info - :rtype: obspy.core.event.Event - :return: event retrieved either via internal or external methods - :raises GathererNoDataException: if no event information is found. - """ - logger.info("gathering event QuakeML") - # Try 1: fetch from ASDFDataSet - if self.ds: - logger.debug("searching ASDFDataSet for event info") - event = self.fetch_event_from_dataset() - else: - event = None - # Try 2: look at local filesystem - if event is None: - logger.debug("searching local filesystem for QuakeML") - event = self.fetch_event_by_dir(event_id, **kwargs) - # Abort if none of the three attempts returned successfully - if event is None: - raise GathererNoDataException(f"no QuakeML found for " - f"{self.config.event_id}") - - # Overwrite origintime with the catalog value to be consistent - self.origintime = event.preferred_origin().time - logger.debug(f"event origin time is set to: {self.origintime}") - - # Save event information to dataset if necessary - if self.ds and self.config.save_to_ds: - try: - self.ds.add_quakeml(event) - logger.debug(f"event QuakeML added to ASDFDataSet") - # Trying to re-add an event to the ASDFDataSet throws ValueError - except ValueError as e: - logger.debug(f"event QuakeML was not added to ASDFDataSet: {e}") - pass - - logger.info(f"matching event found: '{format_event_name(event)}'") - return event - - def gather_station(self, code, **kwargs): - """ - Gather StationXML information from disk - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :rtype: obspy.core.inventory.Inventory - :return: inventory containing relevant network and stations - """ - logger.info(f"gathering StationXML for: {code}") - - # Try 1: fetch from ASDFDataSet - if self.ds: - logger.debug("searching ASDFDataSet for StationXML") - inv = self.fetch_inv_from_dataset(code) - else: - inv = None - # Try 2: fetch from local filesystem - if inv is None: - logger.debug("searching local filesystem for StationXML") - inv = self.fetch_inv_by_dir(code, **kwargs) - # Abort if none of the three attempts returned successfully - if inv: - logger.info(f"matching StationXML found: {code}") - if (self.ds is not None) and self.config.save_to_ds: - # !!! This is a temp fix for PyASDF 0.6.1 where re-adding - # !!! StationXML that contains comments throws a TypeError. - # !!! Issue #59 - try: - self.ds.add_stationxml(inv) - logger.debug("saved inventory to ASDFDataSet") - except TypeError: - pass - else: - raise GathererNoDataException(f"no StationXML for {code} found") - - return inv - - def gather_observed(self, code, **kwargs): - """ - Gather observed waveforms from disk as ObsPy streams - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :rtype: obspy.core.stream.Stream - :return: stream object containing relevant waveforms - """ - logger.info(f"gathering observed waveforms: {code}") - - # Try 1: fetch from ASDFDataSet - if self.ds: - logger.debug("searching ASDFDataSet for observations") - st_obs = self.fetch_waveform_from_dataset( - code=code, tag=self.config.observed_tag - ) - else: - st_obs = None - # Try 2: fetch from local file system, different approaches if we're - # looking for data or synthetic "data" - if st_obs is None: - logger.debug("searching local filesystem for observations") - if self.config.st_obs_type == "syn": - st_obs = self.fetch_synthetic_by_dir(code, - syn_cfgpath="waveforms", - **kwargs) - else: - st_obs = self.fetch_observed_by_dir(code, **kwargs) - # Abort if none of the three attempts returned successfully - if st_obs: - logger.info(f"matching observed waveforms found: {code}") - self.save_waveforms_to_dataset(st_obs, self.config.observed_tag) - else: - raise GathererNoDataException(f"no observed waveforms found " - f"for: {code}") - return st_obs - - def gather_synthetic(self, code, **kwargs): - """ - Gather synthetic waveforms as ObsPy streams. - - Only possible to check ASDFDataSet and local filesystem, cannot gather - synthetics from webservice. - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :rtype: obspy.core.stream.Stream - :return: stream object containing relevant waveforms - :raises GathererNoDataException: if no synthetic data is found - """ - logger.info(f"gathering synthetic waveforms: {code}") - - # Try 1: fetch from ASDFDataSet - if self.ds: - logger.debug("searching ASDFDataSet for synthetics") - st_syn = self.fetch_waveform_from_dataset( - code=code, tag=self.config.synthetic_tag - ) - else: - st_syn = None - # Try 2: fetch from local directories - if st_syn is None: - logger.debug("searching local filesystem for synthetics") - st_syn = self.fetch_synthetic_by_dir(code, **kwargs) - # Abort if none of the attempts returned successfully - if st_syn: - logger.debug(f"matching synthetic waveforms found: {code}") - self.save_waveforms_to_dataset(st_syn, self.config.synthetic_tag) - else: - raise GathererNoDataException(f"no synthetic waveforms found " - f"for: {code}" - ) - return st_syn - - def fetch_event_from_dataset(self): - """ - Return Event information from ASDFDataSet. - - .. note:: - Assumes that the ASDF Dataset will only contain one event, which is - dictated by the structure of Pyatoa. - - :rtype event: obspy.core.event.Event - :return event: event object - :raises AttributeError: if no event attribute found in ASDFDataSet - :raises IndexError: if event attribute found but no events - """ - try: - event = self.ds.events[0] - except (IndexError, AttributeError): - logger.debug(f"no matching event found in dataset") - event = None - return event - - def fetch_inv_from_dataset(self, code): - """ - Return StationXML from ASDFDataSet based on station code. - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :rtype: obspy.core.inventory.network.Network - :return: network containing relevant station information - :raises KeyError: if no matching StationXML found - """ - net, sta, loc, cha = code.split(".") - try: - inv = self.ds.waveforms[f"{net}_{sta}"].StationXML - inv = inv.select(channel=cha) - except (KeyError, AttributeError): - logger.debug(f"no matching inventory in dataset: {net}_{sta}") - inv = None - return inv - - def fetch_waveform_from_dataset(self, code, tag): - """ - Return waveforms as Stream objects from ASDFDataSet. - - .. note: - * Allows for wildcard selection of component (? or *) - * Selects by component because synthetic channel naming may differ - from observation channels. - * Component is assumed to be the last index in the channel, - following SEED convention. - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :type tag: str - :param tag: internal asdf tag labelling waveforms - :rtype: obspy.core.stream.Stream - :return: waveform contained in a stream, or None if no matching value - """ - net, sta, loc, cha = code.split(".") - comp = cha[-1] - try: - st = self.ds.waveforms[f"{net}_{sta}"][tag].select(component=comp) - except KeyError: - logger.debug(f"no matching waveform in dataset: {net}_{sta}_{tag}") - st = None - return st - - def fetch_event_by_dir(self, event_id, prefix="", suffix="", format_=None, - **kwargs): - """ - Fetch event information via directory structure on disk. Developed to - parse CMTSOLUTION and QUAKEML files, but theoretically accepts any - format that the ObsPy read_events() function will accept. - - Will search through all paths given until a matching source file found. - - .. note:: - This function will search for the following path - /path/to/event_dir/{prefix}{event_id}{suffix} - - so, if e.g., searching for a CMTSOLUTION file in the current dir: - ./CMTSOLUTION_{event_id} - - Wildcards are okay but the function will return the first match - - :type event_id: str - :param event_id: Unique event identifier to search source file by. - e.g., a New Zealand earthquake ID '2018p130600'. A prefix or suffix - will be tacked onto this - :rtype event: obspy.core.event.Event or None - :return event: event object if found, else None. - :type prefix: str - :param prefix Prefix to prepend to event id for file name searching. - Wildcards are okay. - :type suffix: str - :param suffix: Suffix to append to event id for file name searching. - Wildcards are okay. - :type format_: str or NoneType - :param format_: Expected format of the file to read, e.g., 'QUAKEML', - passed to ObsPy read_events. NoneType means read_events() will guess - """ - # Ensure that the paths are a list so that iterating doesnt accidentally - # try to iterate through a string. - paths = self.config.paths["events"] - if not isinstance(paths, list): - paths = [paths] - - event = None - for path_ in paths: - if not os.path.exists(path_): - logger.debug(f"event search path does not exist: {path_}") - continue - # Search for available event files - fid = os.path.join(path_, f"{prefix}{event_id}{suffix}") - for filepath in glob.glob(fid): - logger.debug(f"searching for event data: {filepath}") - if os.path.exists(filepath): - try: - # Allow input of various types of source files - if "SOURCE" in prefix.upper(): - logger.debug(f"found SPECFEM2D SOURCE: {filepath}") - cat = [read_specfem2d_source(filepath)] - elif "FORCESOLUTION" in prefix.upper(): - logger.debug(f"found FORCESOLUTION: {filepath}") - cat = [read_forcesolution(filepath)] - # ObsPy can handle QuakeML and CMTSOLUTION - else: - logger.debug(f"found event file: {filepath}") - cat = read_events(filepath, format=format_) - - if len(cat) != 1: - logger.warning( - f"{filepath} event file contains {len(cat)} " - f"events, returning 0th index") - event = cat[0] - break - except Exception as e: - logger.warning(f"{filepath} event file read error: {e}") - - if event is not None: - logger.info(f"retrieved local file: {filepath}") - else: - logger.debug(f"no local event file found") - - return event - - def fetch_inv_by_dir(self, code, resp_dir_template="{sta}.{net}", - resp_fid_template="RESP.{net}.{sta}.{loc}.{cha}", - **kwargs): - """ - Fetch station dataless via directory structure on disk. - Will search through all paths given until StationXML found. - - .. note:: - Default path naming follows SEED convention, that is: - path/to/dataless/{NET}.{STA}/RESP.{NET}.{STA}.{LOC}.{CHA} - e.g. path/to/dataless/NZ.BFZ/RESP.NZ.BFZ.10.HHZ - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :type resp_dir_template: str - :param resp_dir_template: Directory structure template to search for - response files. By default follows the SEED convention: - 'path/to/RESPONSE/{sta}.{net}/' - :type resp_fid_template: str - :param resp_fid_template: Response file naming template to search for - station dataless. By default, follows the SEED convention: - 'RESP.{net}.{sta}.{loc}.{cha}' - :rtype inv: obspy.core.inventory.Inventory or None - :return inv: inventory containing relevant network and stations - """ - inv = None - net, sta, loc, cha = code.split(".") - - # Ensure that the paths are a list so that iterating doesnt accidentally - # try to iterate through a string. - paths = self.config.paths["responses"] - if not isinstance(paths, list): - paths = [paths] - - for path_ in paths: - if not os.path.exists(path_): - logger.debug(f"StationXML search path does not exist: {path_}") - continue - # Attempting to instantiate an empty Inventory requires some - # positional arguements we dont have, so don't do that - fid = os.path.join(path_, resp_dir_template, resp_fid_template) - fid = fid.format(net=net, sta=sta, cha=cha, loc=loc) - logger.debug(f"searching for StationXML: {fid}") - - for filepath in glob.glob(fid): - if inv is None: - # The first inventory becomes the main inv to return - inv = read_inventory(filepath) - else: - # All other inventories are appended to the original - inv_append = read_inventory(filepath) - # Merge inventories to remove repeated networks - inv = merge_inventories(inv, inv_append) - logger.info(f"retrieved StationXML locally: {filepath}") - - return inv - - def fetch_observed_by_dir( - self, code, obs_dir_template="{year}/{net}/{sta}/{cha}", - obs_fid_template="{net}.{sta}.{loc}.{cha}.{year}.{jday:0>3}", - **kwargs): - """ - Fetch observation waveforms via directory structure on disk. - - .. note:: - Default waveform directory structure assumed to follow SEED - convention. That is: - path/to/data/{YEAR}/{NETWORK}/{STATION}/{CHANNEL}*/{FID} - e.g. path/to/data/2017/NZ/OPRZ/HHZ.D/NZ.OPRZ.10.HHZ.D - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :type obs_dir_template: str - :param obs_dir_template: directory structure to search for observation - data. Follows the SEED convention: - 'path/to/obs_data/{year}/{net}/{sta}/{cha}' - :type obs_fid_template: str - :param obs_fid_template: File naming template to search for observation - data. Follows the SEED convention: - '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}' - :rtype stream: obspy.core.stream.Stream or None - :return stream: stream object containing relevant waveforms, else None - """ - if self.origintime is None: - raise AttributeError("`origintime` must be specified") - - net, sta, loc, cha = code.split('.') - # If waveforms contain midnight, multiple files need to be read. - # Checks one hour before and after origintime - jdays = overlapping_days(origin_time=self.origintime, start_pad=3600, - end_pad=3600) - - # Ensure that the paths are a list so that iterating doesnt accidentally - # try to iterate through a string. - paths = self.config.paths["waveforms"] - if not isinstance(paths, list): - paths = [paths] - - st = Stream() - for path_ in paths: - if not os.path.exists(path_): - logger.debug(f"waveform search path does not exist: {path_}") - continue - full_path = os.path.join(path_, obs_dir_template, obs_fid_template) - pathlist = [] - for jday in jdays: - pathlist.append(full_path.format(net=net, sta=sta, cha=cha, - loc=loc, jday=jday, - year=self.origintime.year) - ) - for fid in pathlist: - logger.debug(f"searching for observations: {fid}") - for filepath in glob.glob(fid): - st += read(filepath) - logger.info(f"retrieved observations locally: {filepath}") - break - # Take care of gaps in data by converting to masked data - if len(st) > 0: - st.merge() - - # If empty stream either due to no data or trimming removes all data, - # we will return None - if len(st) == 0: - logger.warning(f"no matching observed waveforms found: {code}") - st = None - - return st - - def fetch_synthetic_by_dir(self, code, syn_cfgpath="synthetics", - syn_unit="?", syn_dir_template="", - syn_fid_template="{net}.{sta}.*{cmp}.sem{dva}*", - **kwargs): - """ - Fetch synthetic waveforms from Specfem3D via directory structure on - disk, if necessary convert native ASCII format to Stream object. - - .. note:: - By default, synthetics will be searched for with the following path - config.paths[syn_cfgpath]/syn_dir_template/syn_fid_template.format() - - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :type syn_cfgpath: str - :param syn_cfgpath: Config.paths key to search for synthetic data. - Defaults to 'synthetics', but for the may need to be set to - 'waveforms' in certain use-cases. - :type syn_unit: str - :param syn_unit: Optional argument to specify the letter used to - identify the units of the synthetic data: For Specfem3D: - ["d", "v", "a", "?"] 'd' for displacement, 'v' for velocity, - 'a' for acceleration. Wildcards okay. Defaults to '?' - :type syn_dir_template: str - :param syn_dir_template: Directory structure template to search for - synthetic waveforms. Defaults to empty string - :type syn_fid_template: str - :param syn_fid_template: The naming template of synthetic waveforms - defaults to "{net}.{sta}.*{cmp}.sem{syn_unit}" - :rtype stream: obspy.core.stream.Stream or None - :return stream: stream object containing relevant waveforms - """ - if self.origintime is None: - raise AttributeError("`origintime` must be specified") - - # Generate information necessary to search for data - net, sta, loc, cha = code.split(".") - - # Ensure that the paths are a list so that iterating doesnt accidentally - # try to iterate through a string. - paths = self.config.paths[syn_cfgpath] - if not isinstance(paths, list): - paths = [paths] - - st = Stream() - for path_ in paths: - if not os.path.exists(path_): - logger.debug(f"synthetic search path does not exist: {path_}") - continue - # Expand the full path for searching for synthetics - full_path = os.path.join(path_, syn_dir_template, syn_fid_template) - filenames = glob.glob(full_path.format(net=net, sta=sta, - cmp=cha[2:], - dva=syn_unit.lower()) - ) - logger.debug(f"found {len(filenames)} synthetics: {full_path}") - if filenames: - for filename in filenames: - try: - # Convert the ASCII file to a miniseed - st += read_sem(filename, self.origintime) - except UnicodeDecodeError: - # If the data file is already in miniseed - st += read(filename) - logger.info(f"retrieved synthetics locally: {filename}") - else: - continue - # Take care of gaps in data by converting to masked data - if len(st) > 0: - st.merge() - # If empty stream either due to no data or trimming removes all data, - # we will return None - if len(st) == 0: - logger.warning(f"no matching synthetic data found: {code}") - st = None - - return st - - def save_waveforms_to_dataset(self, st, tag): - """ - Save waveformsm to the ASDFDataSet with a simple check for existence - of dataset and save parameter. Passes if waveforms already exist while - ignoring the PyASDF warning that gets thrown if waveforms exist. - - :type st: obspy.core.stream.Stream - :param st: Stream object to be saved into the dataset - :type tag: str - :param tag: unique identifier to save the waveforms under - """ - if (self.ds is not None) and self.config.save_to_ds: - # Catch ASDFWarning that occurs when data already exists - with warnings.catch_warnings(): - warnings.filterwarnings("error") - try: - self.ds.add_waveforms(waveform=st, tag=tag) - logger.debug(f"saved waveform to ASDFDataSet as: '{tag}'") - except ASDFWarning: - pass diff --git a/pyatoa/core/inspector.py b/pyatoa/core/inspector.py index ed8e79e4..99be114b 100755 --- a/pyatoa/core/inspector.py +++ b/pyatoa/core/inspector.py @@ -255,11 +255,17 @@ def _get_srcrcv_from_dataset(self, ds): """ # Create a dataframe with source information, ignore duplicates event_id = format_event_name(ds.events[0]) + # Some events, like FORCESOLUTIONS, do not contain information on magni. + try: + magnitude = ds.events[0].preferred_magnitude().mag + except AttributeError: + magnitude = None + if event_id not in self.sources.index: src = { "event_id": format_event_name(ds.events[0]), "time": str(ds.events[0].preferred_origin().time), - "magnitude": ds.events[0].preferred_magnitude().mag, + "magnitude": magnitude, "depth_km": ds.events[0].preferred_origin().depth * 1E-3, "latitude": ds.events[0].preferred_origin().latitude, "longitude": ds.events[0].preferred_origin().longitude, @@ -862,7 +868,8 @@ def stats(self, level="event", choice="mean", key=None, iteration=None, """ group_list = ["iteration", "step", level] - df = getattr(self.windows.groupby(group_list), choice)() + df = getattr(self.windows.groupby(group_list), choice)( + numeric_only=True) if iteration is not None: df = df.loc[iteration] if step_count is not None: diff --git a/pyatoa/core/manager.py b/pyatoa/core/manager.py index 26bcfff4..9c3d9d6f 100755 --- a/pyatoa/core/manager.py +++ b/pyatoa/core/manager.py @@ -10,20 +10,24 @@ import pyflex import pyadjoint import warnings -from pysep.utils.fmt import channel_code + from copy import deepcopy from obspy.signal.filter import envelope from pyasdf import ASDFWarning + from pyatoa import logger from pyatoa.core.config import Config -from pyatoa.core.gatherer import Gatherer, GathererNoDataException -from pyatoa.utils.process import is_preprocessed +from pyatoa.utils.asdf.add import add_misfit_windows, add_adjoint_sources + from pyatoa.utils.asdf.load import load_windows, load_adjsrcs -from pyatoa.utils.window import reject_on_global_amplitude_ratio +from pyatoa.utils.form import channel_code + +from pyatoa.utils.process import (apply_filter, trim_streams, zero_pad, + match_npts, normalize, stf_convolve, + is_preprocessed) from pyatoa.utils.srcrcv import gcd_and_baz -from pyatoa.utils.asdf.add import add_misfit_windows, add_adjoint_sources -from pyatoa.utils.process import (default_process, trim_streams, zero_pad, - match_npts) +from pyatoa.utils.window import reject_on_global_amplitude_ratio + from pyatoa.scripts.load_example_data import load_example_data from pyatoa.visuals.wave_maker import WaveMaker @@ -76,6 +80,7 @@ def reset(self): """Convenience function to reset stats to None""" self.__init__() + class Manager: """ Pyatoas core workflow object. @@ -87,13 +92,15 @@ class Manager: """ def __init__(self, config=None, ds=None, event=None, st_obs=None, st_syn=None, inv=None, windows=None, staltas=None, - adjsrcs=None, gcd=None, baz=None, gatherer=None): + adjsrcs=None, gcd=None, baz=None): """ Initiate the Manager class with or without pre-defined attributes. .. note:: - If `ds` is not given in data can only be gathered via the - config.paths attribute. Data will also not be saved. + + If `ds` is not given in data can only be provided through init + or by passing them directly to the Manager. Data will also not be + saved. :type config: pyatoa.core.config.Config :param config: configuration object that contains necessary parameters @@ -118,9 +125,6 @@ def __init__(self, config=None, ds=None, event=None, st_obs=None, :param gcd: great circle distance between source and receiver in km :type baz: float :param baz: Backazimuth between source and receiver in units of degrees - :type gatherer: pyatoa.core.gatherer.Gatherer - :param gatherer: A previously instantiated Gatherer class. - Should not have to be passed in by User, but is used for reset() """ self.ds = ds self.inv = inv @@ -144,13 +148,6 @@ def __init__(self, config=None, ds=None, event=None, st_obs=None, else: origintime = None - # Instantiate a Gatherer object and pass along info - if gatherer is None: - self.gatherer = Gatherer(config=self.config, ds=self.ds, - origintime=origintime) - else: - self.gatherer = gatherer - # Copy Streams to avoid affecting original data if st_obs is not None: self.st_obs = st_obs.copy() @@ -291,20 +288,33 @@ def check(self): def reset(self): """ Restart workflow by deleting all collected data in the Manager, but - retain dataset, event, config, and gatherer so a new station can be + retain dataset, event, config, so a new station can be processed with the same configuration as the previous workflow. """ - self.__init__(ds=self.ds, event=self.event, config=self.config, - gatherer=self.gatherer) + self.__init__(ds=self.ds, event=self.event, config=self.config) - def write(self, ds=None): + def write_to_dataset(self, ds=None, choice=None): """ - Write the data collected inside Manager to an ASDFDataSet, + Write the data collected inside Manager to an ASDFDataSet :type ds: pyasdf.asdf_data_set.ASDFDataSet or None :param ds: write to a given ASDFDataSet. If None, will look for internal attribute `self.ds` to write to. Allows overwriting to new datasets + :type choice: list or None + :param choice: choose which internal attributes to write, by default + writes all of the following: + 'event': Event atttribute as a QuakeML + 'inv': Inventory attribute as a StationXML + 'st_obs': Observed waveform under tag `config.observed_tag` + 'st_syn': Synthetic waveform under tag `config.synthetic_tag` + 'windows': Misfit windows collected by Pyflex are stored under + `auxiliary_data.MisfitWindow` + 'adjsrcs': Adjoint sources created by Pyadjoint are stored under + `auxiliary_data.AdjointSources` + 'config': the Pyatoa Config object is stored under + 'auxiliary_data.Config' and can be used to re-load the Manager + and re-do processing """ # Allow using both default and input datasets for writing. Check if we # can actually write to the dataset @@ -317,18 +327,22 @@ def write(self, ds=None): logger.warning("dataset opened in read-only mode, cannot write") return - if self.event: + if choice is None: + choice = ["event", "inv", "st_obs", "st_syn", "windows", "adjsrcs", + "config"] + + if self.event and "event" in choice: try: ds.add_quakeml(self.event) except ValueError: logger.debug("Event already present, not added") - if self.inv: + if self.inv and "inv" in choice: try: ds.add_stationxml(self.inv) except TypeError: logger.debug("StationXML already present, not added") # Redirect PyASDF 'waveforms already present' warnings for cleaner look - if self.st_obs: + if self.st_obs and "st_obs" in choice: with warnings.catch_warnings(): warnings.filterwarnings("error") try: @@ -338,7 +352,7 @@ def write(self, ds=None): logger.debug(f"{self.config.observed_tag} waveform already " f"present, not added") pass - if self.st_syn: + if self.st_syn and "st_syn" in choice: with warnings.catch_warnings(): warnings.filterwarnings("error") try: @@ -348,10 +362,23 @@ def write(self, ds=None): logger.debug(f"{self.config.synthetic_tag} waveform " f"already present, not added") pass - if self.windows: - self.save_windows(ds=ds, force=True) - if self.adjsrcs: - self.save_adjsrcs(ds=ds, force=True) + if self.windows and "windows" in choice: + logger.debug("saving misfit windows to ASDFDataSet") + add_misfit_windows(self.windows, ds, path=self.config.aux_path) + if self.adjsrcs and "adjsrcs" in choice: + logger.debug("saving adjoint sources to ASDFDataSet") + add_adjoint_sources(adjsrcs=self.adjsrcs, ds=ds, + path=self.config.aux_path, + time_offset=self.stats.time_offset_sec) + + if self.config and "config" in choice: + with warnings.catch_warnings(): + warnings.filterwarnings("error") + try: + self.config.write(write_to=ds, fmt="asdf") + except ASDFWarning: + logger.debug(f"config already present, not added") + pass def write_adjsrcs(self, path="./", write_blanks=True): """ @@ -485,45 +512,86 @@ def load(self, code=None, path=None, ds=None, synthetic_tag=None, self.check() return self - def flow(self, **kwargs): + def flow(self, standardize_to="syn", fix_windows=False, iteration=None, + step_count=None, **kwargs): """ A convenience function to run the full workflow with a single command. Does not include gathering. Takes kwargs related to all underlying functions. + .. code:: python + mgmt = Manager() mgmt.flow() == mgmt.standardize().preprocess().window().measure() + + :type standardize_to: str + :param standardize_to: choice of 'obs' or 'syn' to use one of the time + series to standardize (resample, trim etc.) the other. + :type fix_windows: bool + :param fix_windows: if True, will attempt to retrieve saved windows from + an ASDFDataSet under the `iteration` and `step_count` tags to use + during misfit quantification rather than measuring new windows + :type iteration: int or str + :param iteration: if 'fix_windows' is True, look for windows in this + iteration. If None, will check the latest iteration/step_count + in the given dataset + :type step_count: int or str + :param step_count: if 'fix_windows' is True, look for windows in this + step_count. If None, will check the latest iteration/step_count + in the given dataset :raises ManagerError: for any controlled exceptions """ - force = kwargs.get("force", False) - standardize_to = kwargs.get("standardize_to", "syn") - fix_windows = kwargs.get("fix_windows", False) - iteration = kwargs.get("iteration", None) - step_count = kwargs.get("step_count", None) - overwrite = kwargs.get("overwrite", None) - which = kwargs.get("which", "both") - save = kwargs.get("save", True) - - self.standardize(standardize_to=standardize_to, force=force) - self.preprocess(overwrite=overwrite, which=which, **kwargs) + if fix_windows: + assert(self.ds is not None), \ + f"`fix_windows` requires ASDFDataSet `ds`" + assert(iteration is not None and step_count is not None), ( + f"`fix_windows` requires 'iteration' and 'step_count' to access" + f"windows from ASDFDataSet" + ) + + self.standardize(standardize_to=standardize_to) + self.preprocess(**kwargs) self.window(fix_windows=fix_windows, iteration=iteration, - step_count=step_count, force=force, save=save) - self.measure(force=force, save=save) + step_count=step_count) + self.measure() - def flow_multiband(self, periods, plot=False, **kwargs): + def flow_multiband(self, periods, standardize_to="syn", fix_windows=False, + iteration=None, step_count=None, plot=False, **kwargs): """ Run the full workflow for a number of distinct period bands, returning a final set of adjoint sources generated as a summation of adjoint - sources from each of these period bands. + sources from each of these period bands. Allows for re-using windows + collected from the first set of period bands to evaluate adjoint sources + from the remaining period bands. + + .. note:: + + Kwargs are passed through to Manager.preprocess() function only - .. rubric:: - manager.flow_multiband(periods=[(1, 5), (10, 30), (40, 100)]) + .. rubric:: Basic Usage + + Manager.flow_multiband(periods=[(1, 5), (10, 30), (40, 100)]) :type periods: list of tuples :param periods: a list of tuples that define multiple period bands to generate windows and adjoint sources for. Overwrites the Config's internal `min_period` and `max_period` parameters. The final adjoint source will be a summation of all adjoint sources generated. + :type standardize_to: str + :param standardize_to: choice of 'obs' or 'syn' to use one of the time + series to standardize (resample, trim etc.) the other. + :type fix_windows: bool + :param fix_windows: if True, will attempt to retrieve saved windows from + an ASDFDataSet under the `iteration` and `step_count` tags to use + during misfit quantification rather than measuring new windows + :type iteration: int or str + :param iteration: if 'fix_windows' is True, look for windows in this + iteration. If None, will check the latest iteration/step_count + in the given dataset + :type step_count: int or str + :param step_count: if 'fix_windows' is True, look for windows in this + step_count. If None, will check the latest iteration/step_count + in the given dataset :type plot: str :param plot: name of figure if given, will plot waveform and map for each period band and append period band to figure name `plot` @@ -532,46 +600,42 @@ def flow_multiband(self, periods, plot=False, **kwargs): measurements from each of the period bands :raises ManagerError: for any controlled exceptions """ - force = kwargs.get("force", False) - standardize_to = kwargs.get("standardize_to", "syn") - fix_windows = kwargs.get("fix_windows", False) - iteration = kwargs.get("iteration", None) - step_count = kwargs.get("step_count", None) - overwrite = kwargs.get("overwrite", None) - which = kwargs.get("which", "both") - save = kwargs.get("save", True) - # Copy these waveforms to overwrite for each new period band st_obs_raw = self.st_obs.copy() st_syn_raw = self.st_syn.copy() + # Do preprocessing once since + self.check() + self.standardize(standardize_to=standardize_to) + self.preprocess(**kwargs) + tags = [] - windows, adjsrcs = {}, {} + multiband_adjsrcs, multiband_windows = {}, {} for period in periods: - tag = f"{period[0]}_{period[1]}" # e.g., 5_10 + tag = f"{period[0]}-{period[1]}" # e.g., '5-10s' tags.append(tag) - logger.info(f"calculating adjoint source for period band {tag}s") + logger.info(f"calculating adjoint source period band {tag}s") self.config.min_period, self.config.max_period = period # Standard flow() try: self.check() - self.standardize(standardize_to=standardize_to, force=force) - self.preprocess(overwrite=overwrite, which=which, **kwargs) + self.standardize(standardize_to=standardize_to) + self.preprocess(**kwargs) self.window(fix_windows=fix_windows, iteration=iteration, - step_count=step_count, force=force, save=save) - self.measure(force=force, save=save) + step_count=step_count) + self.measure() if plot: save = f"{plot}_{tag}.png" self.plot(choice="both", save=save) except ManagerError as e: - logger.warning(f"period band {tag}s encountered error {e}, " - f"cannot return adjoint source for {tag}s") + logger.warning(f"period band {tag}s error {e}, " + f"cannot return adjoint source for {tag}") # Save results of the processing step - adjsrcs[tag] = self.adjsrcs - windows[tag] = self.windows + multiband_adjsrcs[tag] = self.adjsrcs or None + multiband_windows[tag] = self.windows or None # Reset for the next run. Don't do a full reset because that gets # rid of metadata too, which we need for response removal @@ -581,126 +645,76 @@ def flow_multiband(self, periods, plot=False, **kwargs): self.st_syn = st_syn_raw.copy() self.stats.reset() - # Finally, combine all the adjoint sources into a single trace - for comp in [tr.stats.component for tr in self.st_syn]: - adjsrcs[comp] = np.zeros(len(self.st_syn[0].data)) - n = 0 # used to average over the number of summed adjoint sources - for tag in tags: - # Accessing each component of each period band and summing - # together to generate the final adjoint source - if comp in adjsrcs[tag]: - adjsrcs[comp] += adjsrcs[tag][comp].adjoint_source - n += 1 - # Average the summation over adjoint sources by the number of inputs - if n: - adjsrcs[comp] /= n - - return windows, adjsrcs - - def gather(self, code=None, choice=None, event_id=None, **kwargs): - """ - Gather station dataless and waveform data using the Gatherer class. - In order collect observed waveforms, dataless, and finally synthetics. - - For valid kwargs see methods in :doc:`core.gatherer` + # Average all adjoint sources into a single object and collect windows + self.windows, self.adjsrcs = \ + self._combine_mutliband_results(multiband_windows, + multiband_adjsrcs) - :type code: str - :param code: Station code following SEED naming convention. - This must be in the form NN.SSSS.LL.CCC (N=network, S=station, - L=location, C=channel). Allows for wildcard naming. By default - the pyatoa workflow wants three orthogonal components in the N/E/Z - coordinate system. Example station code: NZ.OPRZ.10.HH? - :type choice: list - :param choice: allows user to gather individual bits of data, rather - than gathering all. Allowed: 'inv', 'st_obs', 'st_syn' - :raises ManagerError: if any part of the gathering fails. - - Keyword Arguments - :: - bool try_fm: - Try to retrieve and append focal mechanism information to the - Event object. - str prefix: - Prefix for event id when searching for event information, - can be used to search ordered files e.g., CMTSOLUTION_001 - str suffix: - Suffix for event id when searching for event information - str station_level: - The level of the station metadata if retrieved using the ObsPy - Client. Defaults to 'response' - str resp_dir_template: - Directory structure template to search for response files. - By default follows the SEED convention: - 'path/to/RESPONSE/{sta}.{net}/' - str resp_fid_template: - Response file naming template to search for station dataless. - By default, follows the SEED convention - 'RESP.{net}.{sta}.{loc}.{cha}' - str obs_dir_template: - directory structure to search for observation data. Follows the - SEED convention: 'path/to/obs_data/{year}/{net}/{sta}/{cha}' - str obs_fid_template: - File naming template to search for observation data. Follows the - SEED convention: '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}' - str syn_cfgpath: - Config.cfgpaths key to search for synthetic data. Defaults to - 'synthetics', but for the may need to be set to 'waveforms' in - certain use-cases, e.g. synthetics-synthetic inversions. - str syn_unit: - Optional argument to specify the letter used to identify the - units of the synthetic data: For Specfem3D: ["d", "v", "a", "?"] - 'd' for displacement, 'v' for velocity, 'a' for acceleration. - Wildcards okay. Defaults to '?' - str syn_dir_template: - Directory structure template to search for synthetic waveforms. - Defaults to empty string - str syn_fid_template: - The naming template of synthetic waveforms defaults to: - "{net}.{sta}.*{cmp}.sem{syn_unit}" + def _combine_mutliband_results(self, windows, adjsrcs): """ - # Default to gathering all data - if choice is None: - choice = ["event", "inv", "st_obs", "st_syn"] - try: - # Attempt to gather event information before waveforms/metadata - if "event" in choice and self.event is None: - if event_id is None: - event_id = self.config.event_id - self.event = self.gatherer.gather_event(event_id, **kwargs) - if code is not None: - logger.info(f"gathering data for {code}") - if "st_obs" in choice: - # Ensure observed waveforms gathered before synthetics and - # metadata. If this fails, no point to gathering the rest - self.st_obs = self.gatherer.gather_observed(code, **kwargs) - if "st_syn" in choice: - self.st_syn = self.gatherer.gather_synthetic(code, **kwargs) - # Allow StationXML gathering to fail, e.g., with synthetic data - # we will not have/need response informatino - if "inv" in choice: - try: - self.inv = self.gatherer.gather_station(code, **kwargs) - except GathererNoDataException as e: - logger.warning(f"Could not find matching StationXML " - f"for {code}, setting 'inv' attribute " - f"to None") - self.inv = None + Function flow_multiband() generates multiple sets of adjoint sources + for a variety of period bands, however the User is only interested in a + single adjoint source which is the average of all of these adjoint + sources. + + This function will take the multiple sets of adjoint sources and sum + them accordingly, returning a single set of AdjointSource objects which + can be used the same as any `adjsrc` attribute returned from `measure`. + + :type adjsrcs: dict of dicts + :param adjsrcs: a collection of dictionaries whose keys are the + period band set in `flow_multiband(periods)` and whose values are + dictionaries returned in `Manager.adjsrcs` from `Manager.measure()` + :rtype: (dict of Windows, dict of AdjointSource) + :return: a dictionary of Windows, and AdjointSource objects for each + component in the componet list. Adjoint sources and misfits + are the average of all input `adjsrcs` for the given `periods` range + """ + adjsrcs_out = {} + windows_out = {} + for comp in self.config.component_list: + n = 0 + windows_out[comp] = [] + for tag, adjsrc_dict in adjsrcs.items(): + # Sometimes adjoint sources are empty for a given period range + # which likely means windows are also empty + if not adjsrc_dict: + continue + + adjsrc = adjsrc_dict[comp] + # Set a template adjoint source whose attrs. that will change + if comp not in adjsrcs_out: + adjsrcs_out[comp] = deepcopy(adjsrc) + adjsrcs_out[comp].min_period = 1E6 # very large number + adjsrcs_out[comp].max_period = 0 + adjsrcs_out[comp].misfit = 0 + adjsrcs_out[comp].window_stats = [] + adjsrcs_out[comp].windows = [] + adjsrcs_out[comp].adjoint_source = adjsrc.adjoint_source * 0 + + # Windows are easy, simply append Windows objects to a list + try: + windows_out[comp] += windows[tag][comp] + except TypeError: + pass - return self - except GathererNoDataException as e: - # Catch the Gatherer exception and redirect as ManagerError - # so that it can be caught by flow() - logger.warning(e, exc_info=False) - raise ManagerError( - f"Data Gatherer could not find some data: {e}") from e - except Exception as e: - # Gathering should be robust, but if something slips through, dont - # let it kill a workflow, display and raise ManagerError - logger.warning(e, exc_info=True) - raise ManagerError( - f"Uncontrolled error in data gathering: {e}") from e - - def standardize(self, force=False, standardize_to="syn"): + # Set internal attributes as collections of other attributes + adjsrcs_out[comp].min_period = min(adjsrc.min_period, + adjsrcs_out[comp].min_period) + adjsrcs_out[comp].max_period = max(adjsrc.max_period, + adjsrcs_out[comp].max_period) + adjsrcs_out[comp].misfit += adjsrc.misfit + adjsrcs_out[comp].windows += adjsrc.windows + adjsrcs_out[comp].window_stats += adjsrc.window_stats + adjsrcs_out[comp].adjoint_source += adjsrc.adjoint_source + n += 1 + # Normalize based on the number of input adjoint sources + adjsrcs_out[comp].misfit /= n + adjsrcs_out[comp].adjoint_source /= n + + return windows_out, adjsrcs_out + + def standardize(self, force=False, standardize_to="syn", normalize_to=None): """ Standardize the observed and synthetic traces in place. Ensures Streams have the same starttime, endtime, sampling rate, npts. @@ -712,7 +726,13 @@ def standardize(self, force=False, standardize_to="syn"): :param standardize_to: allows User to set which Stream conforms to which by default the Observed traces should conform to the Synthetic ones because exports to Specfem should be controlled by the Synthetic - sampling rate, npts, etc. + sampling rate, npts, etc. Choices are 'obs' and 'syn'. + :type normalize_to: str + :param normalize_to: allow for normalizing the amplitudes of the two + traces. Choices are: + 'obs': normalize synthetic waveforms to the max amplitude of obs + 'syn': normalize observed waveform to the max amplitude of syn + 'one': normalize both waveforms so that their max amplitude is 1 """ self.check() if not self.stats.len_obs or not self.stats.len_syn: @@ -727,11 +747,13 @@ def standardize(self, force=False, standardize_to="syn"): if dt_st > 0: self.st_obs = zero_pad(self.st_obs, dt_st, before=True, after=False) - # Match sampling rates - if standardize_to == "syn": - self.st_obs.resample(self.st_syn[0].stats.sampling_rate) - else: - self.st_syn.resample(self.st_obs[0].stats.sampling_rate) + # Match sampling rates only if they differ + if self.st_syn[0].stats.sampling_rate != \ + self.st_obs[0].stats.sampling_rate: + if standardize_to == "syn": + self.st_obs.resample(self.st_syn[0].stats.sampling_rate) + else: + self.st_syn.resample(self.st_obs[0].stats.sampling_rate) # Match start and endtimes self.st_obs, self.st_syn = trim_streams( @@ -745,6 +767,14 @@ def standardize(self, force=False, standardize_to="syn"): force={"obs": "a", "syn": "b"}[standardize_to] ) + # Allow normalization of waveform amplitudes to one another or to + # a given value + if normalize_to is not None: + self.st_obs, self.st_syn = normalize( + st_a=self.st_obs, st_b=self.st_syn, + choice={"obs": "a", "syn": "b", "one": "one"}[normalize_to] + ) + # Determine if synthetics start before the origintime if hasattr(self.st_syn[0].stats, "time_offset"): self.stats.time_offset_sec = self.st_syn[0].stats.time_offset @@ -767,84 +797,117 @@ def standardize(self, force=False, standardize_to="syn"): return self - def preprocess(self, which="both", overwrite=None, **kwargs): + def preprocess(self, which="both", filter_=True, corners=2, + remove_response=False, taper_percentage=0.05, zerophase=True, + normalize_to=None, convolve_with_stf=True, + half_duration=None, **kwargs): """ - Preprocess observed and synthetic waveforms in place. - Default preprocessing tasks: Remove response (observed), rotate, filter, - convolve with source time function (synthetic). + Apply a simple, default preprocessing scheme to observed and synthetic + waveforms in place. - .. note:: - Default preprocessing can be overwritten using a - user-defined function that takes Manager and choice as inputs - and outputs an ObsPy Stream object. + Default preprocessing tasks: Remove response (optional), + rotate (optional), filter, convolve with source time function (optional) - .. note:: - Documented kwargs only apply to default preprocessing. + User is free to skip this step and perform their own preprocessing on + `Manager.st_obs` and `Manager.st_syn` if they require their own unique + processing workflow. :type which: str :param which: "obs", "syn" or "both" to choose which stream to process - defaults to both - :type overwrite: function - :param overwrite: If a function is provided, it will overwrite the - standard preprocessing function. All arguments that are given - to the standard preprocessing function will be passed as kwargs to - the new function. This allows for customized preprocessing - - Keyword Arguments - :: - int water_level: - water level for response removal - float taper_percentage: - amount to taper ends of waveform - bool remove_response: - remove instrument response using the Manager's inventory object. - Defaults to True - bool apply_filter: - filter the waveforms using the Config's min_period and - max_period parameters. Defaults to True - bool convolve_with_stf: - Convolve synthetic data with a Gaussian source time function if - a half duration is provided. - bool rotate_to_rtz: - Use the `rotate_baz` variable to rotate streams - from ZNE components to RTZ + defaults to "both" + :type filter_: bool + :param filter_: filter data using Config.min_period and Config.max_period + with `corners` filter corners. Apply tapers and demeans before + and after application of filter. + :type taper_percentage: float + :param taper_percentage: percentage [0, 1] of taper to apply to head and + tail of the data before and after preprocessing + :type corners: int + :param corners: number of filter corners to apply if `filter`==True + :type zerophase: bool + :param zerophase: apply a zerophase filter (True) or not (False). + Zerophase filters are run back and forth meaning no phase shift + is applied, but more waveform distorition may be present. + :type remove_response: bool + :param remove_response: flag, remove instrument response from + 'obs' type data using the provided `inv`. Defaults to False. + Kwargs are passed directly to the the ObsPy `remove_response` + function. See ObsPy docs for available options. + :type convolve_with_stf: bool + :param convolve_with_stf: flag, convolve 'syn' type data with a Gaussian + source time function to mimic a finite source. Used when half + half duration in seismic simulations is set to 0. + Defaults to True and relies on parameters `half_duration` + :type half_duration: float + :param half_duration: Source time function half duration in units of + seconds. Only used if `convolve_with_stf`==True + :type normalize_to: str + :param normalize_to: allow for normalizing the amplitudes of the two + traces. Choices are: + 'obs': normalize synthetic waveforms to the max amplitude of obs + 'syn': normalize observed waveform to the max amplitude of syn + 'one': normalize both waveforms so that their max amplitude is 1 """ - if overwrite: - assert(hasattr(overwrite, '__call__')), "overwrite must be function" - preproc_fx = overwrite + if which.lower() == "obs": + preproc_list = {"obs": self.st_obs} + elif which.lower() == "syn": + preproc_list = {"syn": self.st_syn} else: - preproc_fx = default_process - - # Preprocess observation waveforms - if self.st_obs is not None and not self.stats.obs_processed and \ - which.lower() in ["obs", "both"]: - logger.info(f"preprocess `st_obs` as '{self.config.st_obs_type}'") - self.st_obs = preproc_fx( - st=self.st_obs, choice=self.config.st_obs_type, - inv=self.inv, baz=self.baz, - **{**vars(self.config), **kwargs} - ) - self.stats.obs_processed = True - - # Preprocess synthetic waveforms - if self.st_syn is not None and not self.stats.syn_processed and \ - which.lower() in ["syn", "both"]: - logger.info(f"preprocess `st_syn` as '{self.config.st_syn_type}'") - self.st_syn = preproc_fx( - st=self.st_syn, choice=self.config.st_syn_type, - inv=self.inv, baz=self.baz, - **{**vars(self.config), **kwargs} + preproc_list = {"obs": self.st_obs, "syn": self.st_syn} + + # Apply preprocessing in-place on streams + for key, st in preproc_list.items(): + # Remove response from 'obs' type data only + if remove_response: + if (key == "obs" and self.config.st_obs_type == "obs") or ( + key == "syn" and self.config.st_syn_type == "obs"): + st.remove_response(inventory=self.inv, plot=False, **kwargs) + + # Detrend and taper pre-filter + st.detrend("simple") + st.detrend("demean") + st.taper(taper_percentage) + + if self.config.rotate_to_rtz and self.baz is not None: + logger.info(f"rotate {key} NE->RT by {self.baz} degrees") + st.rotate(method="NE->RT", back_azimuth=self.baz) + + if filter_ and (self.config.min_period or self.config.max_period): + logger.info(f"filtering {key} {self.config.min_period}--" + f"{self.config.max_period}") + apply_filter(st=st, min_period=self.config.min_period, + max_period=self.config.max_period, + corners=corners, zerophase=zerophase + ) + # Detrend and taper post filter + st.detrend("simple") + st.detrend("demean") + st.taper(taper_percentage) + + # Convolve waveform with source time function for `syn` type data + if convolve_with_stf and half_duration: + if (key == "obs" and self.config.st_obs_type == "syn") or ( + key == "syn" and self.config.st_syn_type == "syn"): + stf_convolve(st=st, half_duration=half_duration) + + # Allow normalization of waveform amplitudes to one another or to + # a given value + if normalize_to is not None: + self.st_obs, self.st_syn = normalize( + st_a=self.st_obs, st_b=self.st_syn, + choice={"obs": "a", "syn": "b", "one": "one"}[normalize_to] ) - self.stats.syn_processed = True - # Set stats + # Set stats post preprocessing + self.stats.obs_processed = is_preprocessed(self.st_obs) + self.stats.syn_processed = is_preprocessed(self.st_syn) self.stats.len_obs = len(self.st_obs) self.stats.len_syn = len(self.st_syn) return self def window(self, fix_windows=False, iteration=None, step_count=None, - force=False, save=True): + force=False): """ Evaluate misfit windows using Pyflex. Save windows to ASDFDataSet. Allows previously defined windows to be retrieved from ASDFDataSet. @@ -868,16 +931,10 @@ def window(self, fix_windows=False, iteration=None, step_count=None, :type force: bool :param force: ignore flag checks and run function, useful if e.g. external preprocessing is used that doesn't meet flag criteria - :type save: bool - :param save: save the gathered windows to an ASDF Dataset """ # Pre-check to see if data has already been standardized self.check() - if self.config.pyflex_preset is None: - logger.info("pyflex preset is set to 'None', will not window") - return - if not self.stats.standardized and not force: raise ManagerError("cannot window, waveforms not standardized") @@ -914,8 +971,6 @@ def window(self, fix_windows=False, iteration=None, step_count=None, else: self.select_windows_plus() - if save: - self.save_windows() logger.info(f"{self.stats.nwin} window(s) total found") return self @@ -989,7 +1044,7 @@ def select_windows_plus(self): waveforms, and running select_windows() again, but for now we just raise a ManagerError and allow processing to continue """ - logger.info(f"windowing w/ map: {self.config.pyflex_preset}") + logger.info(f"windowing waveforms with Pyflex") nwin, window_dict, reject_dict = 0, {}, {} for comp in self.config.component_list: @@ -1038,7 +1093,7 @@ def select_windows_plus(self): self.rejwins = reject_dict self.stats.nwin = nwin - def measure(self, force=False, save=True): + def measure(self, force=False): """ Measure misfit and calculate adjoint sources using PyAdjoint. @@ -1050,6 +1105,7 @@ def measure(self, force=False, save=True): Saves resultant dictionary to a pyasdf dataset if given. .. note:: + Pyadjoint returns an unscaled misfit value for an entire set of windows. To return a "total misfit" value as defined by Tape (2010) Eq. 6, the total summed misfit will need to be scaled by @@ -1058,8 +1114,6 @@ def measure(self, force=False, save=True): :type force: bool :param force: ignore flag checks and run function, useful if e.g. external preprocessing is used that doesn't meet flag criteria - :type save: bool - :param save: save adjoint sources to ASDFDataSet """ self.check() @@ -1100,8 +1154,6 @@ def measure(self, force=False, save=True): # Save adjoint source internally and to dataset self.adjsrcs = adjoint_sources - if save: - self.save_adjsrcs() # Run check to get total misfit self.check() @@ -1109,71 +1161,11 @@ def measure(self, force=False, save=True): return self - def save_windows(self, ds=None, force=False): - """ - Convenience function to save collected misfit windows into an - ASDFDataSet with some preliminary checks - - Auxiliary data tag is hardcoded as 'MisfitWindows' - - :type ds: pyasdf.ASDFDataSet - :param ds: allow replacement of the internal `ds` dataset. If None, - will try to write to internal `ds` - :type force: bool - :param force: force saving windows even if Config says don't do it. - This is used by write() to bypass the default 'dont save' behavior - """ - if ds is None: - ds = self.ds - - if ds is None: - logger.debug("no ASDFDataSet, will not save windows") - elif not self.windows: - logger.debug("Manager has no windows to save") - elif not self.config.save_to_ds and not force: - logger.debug("config parameter `save_to_ds` is set False, " - "will not save windows") - else: - logger.debug("saving misfit windows to ASDFDataSet") - add_misfit_windows(self.windows, ds, path=self.config.aux_path) - - def save_adjsrcs(self, ds=None, force=False): - """ - Convenience function to save collected adjoint sources into an - ASDFDataSet with some preliminary checks - - Auxiliary data tag is hardcoded as 'AdjointSources' - - :type ds: pyasdf.ASDFDataSet - :param ds: allow replacement of the internal `ds` dataset. If None, - will try to write to internal `ds` - :type force: bool - :param force: force saving windows even if Config says don't do it. - This is used by write() to bypass the default 'dont save' behavior - """ - if ds is None: - ds = self.ds - - if ds is None: - logger.debug("no ASDFDataSet, cannot save adjoint sources") - elif not self.adjsrcs: - logger.debug("Manager has no adjoint sources to save") - elif not self.config.save_to_ds and not force: - logger.debug("config parameter `save_to_ds` is set False, " - "will not save adjoint sources") - else: - logger.debug("saving adjoint sources to ASDFDataSet") - add_adjoint_sources(adjsrcs=self.adjsrcs, ds=ds, - path=self.config.aux_path, - time_offset=self.stats.time_offset_sec) - def _format_windows(self): """ - .. note:: - In `pyadjoint.calculate_adjoint_source`, the window needs to be a - list of lists, with each list containing the - [left_window, right_window]; each window argument should be given in - units of time (seconds). This is not in the PyAdjoint docs. + In `pyadjoint.calculate_adjoint_source`, the window needs to be a + list of lists, with each list containing the [left_window, right_window] + Each window argument should be given in units of time (seconds). :rtype: dict of list of lists :return: dictionary with key related to individual components, @@ -1256,7 +1248,6 @@ def plot(self, choice="both", save=None, show=True, corners=None, plt.close() # Plot waveform and map on the same figure elif choice == "both": - if figsize is None: figsize = (1400 / dpi, 600 / dpi) @@ -1281,3 +1272,7 @@ def plot(self, choice="both", save=None, show=True, corners=None, plt.show() else: plt.close() + + # One final shutdown of all figures just incase + plt.close("all") + diff --git a/pyatoa/plugins/__init__.py b/pyatoa/plugins/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/pyatoa/plugins/pyflex_presets.py b/pyatoa/plugins/pyflex_presets.py deleted file mode 100644 index 99ac48ff..00000000 --- a/pyatoa/plugins/pyflex_presets.py +++ /dev/null @@ -1,378 +0,0 @@ -""" -Pyflex requires configuration objects to set the number of available variables -that tune the Flexwin algorithm. - -This file contains some preset maps for Pyflex that are mostly taken directly -from the Flexwin publication Maggi et al. 2009. - -For variable descriptions see: - https://krischer.github.io/pyflex/_modules/pyflex/config.html - -+ Descriptions of a few commonly used parameters that are not self explanatory - - 1. Short Term Average Long Term Average water level - :stalta_waterlevel (float): reject windows where sta/lta waveform dips - below this threshold value. between 0 and 1 - 2. Water level rejection - :c_0: reject if window.stalta.min() < c_0 * stalta_waterlevel - :c_1: min_acceptable_window_length = c_1 * T_min - 3. Prominence rejection - :c_2: reject if window.stalta.min() < c_2 * window.stalta.max() - 4. Separation height in phase separation - :c_3a: d_stlta > c_3a * d_stalta_center * f_time - where d_stalta = current max height above min - and d_stalta_center = central max height above min - and f_time = time decay function - :c_3b: d_time = separation between center of window and internal maxima - if d_time > c_3b then f_time is a time decay function, else its 1 - - if c_3b goes down, - 5. Emergent start/stops and coda wave curtailing - :c_4a: time_decay_left = T_min * c_4a / dt - :c_4b: time_decay_right: T_min * c_4b / dt -""" - -pyflex_presets = { - # Empty preset or 'default' will use default init values - "default": { - }, - # Example configuration from the Pyflex website - "example": { - "stalta_waterlevel": 0.08, - "tshift_acceptance_level": 15.0, - "dlna_acceptance_level": 1.0, - "cc_acceptance_level": 0.8, - "c_0": 0.7, - "c_1": 4.0, - "c_3a": 1.0, - "c_3b": 2.0, - "c_4a": 3.0, - "c_4b": 10.0 - }, - # For use with testing the workflow using a homogeneous halfspace example - "homogeneous_halfspace": { - "stalta_waterlevel": 0.05, - "tshift_acceptance_level": 15.0, - "dlna_acceptance_level": 2.0, - "s2n_limit": 3., - "min_surface_wave_velocity": 1.5, - "c_0": 0.7, - "c_1": 2., - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # NZNORTH: From the New Zealand group doing regional North Island studies - # These are the main parameters used in Chow et al. (2020) - "nznorth_10-30s_chow_et_al": { - "stalta_waterlevel": 0.10, - "tshift_acceptance_level": 8.0, # based on sign-flip - "dlna_acceptance_level": 2.0, - "cc_acceptance_level": 0.7, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.2, # Default is 3.0, chow et al.==1.4 - "check_global_data_quality": True, # Default is False - "c_0": 0.7, - "c_1": 2.0, - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # North Island study area, 15-30s bandpass, for long period start waveforms - "nznorth_15-30s": { - "stalta_waterlevel": 0.08, - "tshift_acceptance_level": 12.0, - "dlna_acceptance_level": 2.5, - "cc_acceptance_level": 0.7, - "s2n_limit": 2.5, - "max_time_before_first_arrival": 10., - "min_surface_wave_velocity": 1.2, - "check_global_data_quality": True, - "c_0": 0.7, - "c_1": 2.0, - "c_3a": 1.0, - "c_3b": 2.0, - "c_4a": 3.0, - "c_4b": 10.0 - }, - # North Island study area, 10-30s bandpass. Tested on Forest inversion. - "nznorth_10-30s": { - "stalta_waterlevel": 0.10, - "tshift_acceptance_level": 10.0, # based on sign-flip - "dlna_acceptance_level": 2.0, - "cc_acceptance_level": 0.7, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.2, # Default is 3.0, chow et al.==1.4 - "check_global_data_quality": True, # Default is False - "c_0": 0.7, - "c_1": 2.0, - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # North Island study area, 8-30s bandpass. Tested on Forest inversion. - "nznorth_8-30s": { - "stalta_waterlevel": 0.10, - "tshift_acceptance_level": 8.0, - "dlna_acceptance_level": 1.5, - "cc_acceptance_level": 0.7, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.1, - "check_global_data_quality": True, - "c_0": 0.7, - "c_1": 2.0, # min window = c1 * tmin = 16s - "c_3a": 4.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # North Island study area, 6-30s bandpass - "nznorth_6-30s": { - "stalta_waterlevel": 0.08, - "tshift_acceptance_level": 8., - "dlna_acceptance_level": 1.5, - "cc_acceptance_level": 0.60, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.05, - "check_global_data_quality": True, - "snr_integrate_base": 3.5, # exclude noisy data - "c_0": 0.8, # reject if win.stalta.min < c_0 * stalta_wl - "c_1": 2.0, # min window = c1 * tmin = 12s - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # North Island study area, 4-30s bandpass - "nznorth_4-30s": { - "stalta_waterlevel": 0.075, - "tshift_acceptance_level": 6., - "dlna_acceptance_level": 1.5, - "cc_acceptance_level": 0.65, - "s2n_limit": 4., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.0, - "check_global_data_quality": True, - "snr_integrate_base": 3.5, # exclude noisy data - "c_0": 0.9, # reject if win.stalta.min < c_0 * stalta_wl - "c_1": 3., - "c_3a": 3.5, - "c_3b": 2.25, - "c_4a": 2.25, - "c_4b": 9.0 - }, - # North Island study area, 3-30s bandpass - # !!! NOT YET TESTED, LOOSELY BASED ON 'socal_3-30s' - "nznorth_3-30s": { - "stalta_waterlevel": 0.069, - "tshift_acceptance_level": 5.0, - "dlna_acceptance_level": 1., - "cc_acceptance_level": 0.675, - "s2n_limit": 4., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.0, - "check_global_data_quality": True, - "snr_integrate_base": 3.5, - "c_0": .85, # reject if win.stalta.min < c_0 * stalta_wl - "c_1": 2.66, # min window = c1 * tmin - "c_3a": 4.0, - "c_3b": 2.5, - "c_4a": 2., - "c_4b": 6.0 - }, - # North Island study area, 6-30s bandpass post-hoc waveform improvement - # analysis. Looser windowing parameters than the 6-30s preset, to try to - # get a more diverse look at the dataset rather than choosing only the - # good sections which is what we wanted to do in the inversion - "nznorth_6-30s_posthoc": { - "stalta_waterlevel": 0.08, - "tshift_acceptance_level": 12., - "dlna_acceptance_level": 1.5, - "cc_acceptance_level": 0.60, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.05, - "check_global_data_quality": True, - "snr_integrate_base": 3.5, # exclude noisy data - "c_0": 0.8, # reject if win.stalta.min < c_0 * stalta_wl - "c_1": 2.0, # min window = c1 * tmin = 12s - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # For the 1D velocity model of Ristau (2008) - "nznorth_1D": { - "stalta_waterlevel": 0.07, - "tshift_acceptance_level": 10.0, - "dlna_acceptance_level": 2.0, - "cc_acceptance_level": 0.7, - "tshift_reference": 4., - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.7, # Default is 3.0 - "check_global_data_quality": True, # Default is False - "c_0": 0.7, - "c_1": 2., - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # For the inversion of the 1D North Island velocity model of Ristau (2008) - "nzni1D_10-30s": { - "stalta_waterlevel": 0.10, - "tshift_acceptance_level": 12.0, # based on sign-flip - "dlna_acceptance_level": 2.0, - "cc_acceptance_level": 0.675, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.6, # Default is 3.0, chow et al.==1.4 - "check_global_data_quality": True, # Default is False - "c_0": 0.7, - "c_1": 2.0, - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # For the inversion of the 1D North Island velocity model of Ristau (2008) - # looser bounds for wider window selection - "nzni1D_10-30s_loose": { - "stalta_waterlevel": 0.08, - "tshift_acceptance_level": 12.0, # based on sign-flip - "dlna_acceptance_level": 2.0, - "cc_acceptance_level": 0.65, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.6, # Default is 3.0, chow et al.==1.4 - "check_global_data_quality": True, # Default is False - "c_0": 0.7, - "c_1": 2.0, - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # 1D North Island inversion 8-30s - "nzni1D_8-30s": { - "stalta_waterlevel": 0.08, - "tshift_acceptance_level": 10.0, - "dlna_acceptance_level": 2.0, - "cc_acceptance_level": 0.675, - "s2n_limit": 3., - "max_time_before_first_arrival": 5., - "min_surface_wave_velocity": 1.4, # Default is 3.0, chow et al.==1.4 - "check_global_data_quality": True, # Default is False - "c_0": 0.7, - "c_1": 2.5, - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # Global scale from Maggi et al. 2009 Table 3 for 50s < T < 150s - "global": { - "s2n_limit": 2.5, - "stalta_waterlevel": 0.08, - "cc_acceptance_level": 0.85, - "tshift_acceptance_level": 15.0, - "dlna_acceptance_level": 1., - "tshift_reference": 0., - "dlna_reference": 0., - "c_0": 0.7, - "c_1": 4., - "c_2": 0.3, - "c_3a": 1.0, - "c_3b": 2.0, - "c_4a": 3., - "c_4b": 10.0 - }, - # Japan scale from Maggi et al. 2009 Table 3 for 6 < T < 30s - "japan_6-30s": { - "s2n_limit": 3., - "stalta_waterlevel": 0.12, - "cc_acceptance_level": 0.73, - "tshift_acceptance_level": 3.0, - "dlna_acceptance_level": 1.5, - "tshift_reference": 0., - "dlna_reference": 0., - "c_0": 0.7, - "c_1": 3., - "c_2": 0.6, - "c_3a": 1.0, - "c_3b": 2.0, - "c_4a": 3., - "c_4b": 12.0 - }, - # Southern California scale from Maggi et al. 2009 Table 3 for 6 < T < 30s - "socal_6-30s": { - "s2n_limit": 3., - "stalta_waterlevel": 0.18, - "cc_acceptance_level": 0.71, - "tshift_acceptance_level": 8.0, - "dlna_acceptance_level": 1.5, - "tshift_reference": 4., - "dlna_reference": 0., - "c_0": 0.7, - "c_1": 2., - "c_2": 0., - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, - # Southern California scale from Maggi et al. 2009 Table 3 for 3 < T < 30s - "socal_3-30s": { - "s2n_limit": 4., - "stalta_waterlevel": 0.11, - "cc_acceptance_level": 0.8, - "tshift_acceptance_level": 4.0, - "dlna_acceptance_level": 1., - "tshift_reference": 2., - "dlna_reference": 0., - "c_0": 1.3, - "c_1": 4., - "c_2": 0., - "c_3a": 4.0, - "c_3b": 2.5, - "c_4a": 2., - "c_4b": 6.0 - }, - # Southern California scale from Maggi et al. 2009 Table 3 for 2 < T < 30s - "socal_2-30s": { - "s2n_limit": 4., - "stalta_waterlevel": 0.07, - "cc_acceptance_level": 0.85, - "tshift_acceptance_level": 3.0, - "dlna_acceptance_level": 1., - "tshift_reference": 1., - "dlna_reference": 0., - "c_0": 1., - "c_1": 5., - "c_2": 0., - "c_3a": 4.0, - "c_3b": 2.5, - "c_4a": 2., - "c_4b": 6.0 - }, - # From the UAF group doing regional studies of Alaska - "alaska": { - "stalta_waterlevel": 0.18, - "tshift_acceptance_level": 4.0, - "dlna_acceptance_level": 1.5, - "cc_acceptance_level": 0.71, - "c_0": 0.7, - "c_1": 2.0, - "c_3a": 3.0, - "c_3b": 2.0, - "c_4a": 2.5, - "c_4b": 12.0 - }, -} diff --git a/pyatoa/scripts/process_data_w_mpi.py b/pyatoa/scripts/process_data_w_mpi.py index 286677eb..e471e005 100755 --- a/pyatoa/scripts/process_data_w_mpi.py +++ b/pyatoa/scripts/process_data_w_mpi.py @@ -16,8 +16,8 @@ .. rubric:: usage - # To run this on 4 cores - mpirun -n 4 process_data_w_mpi.py +# To run this on 4 cores +mpirun -n 4 process_data_w_mpi.py """ import os import numpy as np diff --git a/pyatoa/scripts/syn_syn_example.py b/pyatoa/scripts/syn_syn_example.py new file mode 100644 index 00000000..559dcaf4 --- /dev/null +++ b/pyatoa/scripts/syn_syn_example.py @@ -0,0 +1,59 @@ +""" +Pyatoa Example: Synthetic-Synthetic misfit quantification + +Generate adjoint sources for pairs of synthetic data generated from two +different models using SPECFEM 2D/3D/3D_GLOBE. + +.. note:: + + It is expected that the User has already generated their synthetic data + using SPECFEM and that the same STATIONS list was used + +https://pyatoa.readthedocs.io/syn-syn.html +""" +import os +from obspy import UTCDateTime +from pysep.utils.io import read_sem +from pyatoa import Config, Manager + + +# Determine where the synthetics are stored +path_syn = "./SGF/AAS000000" +path_obs = "./EGF/AAS000000" +path_out = "./SEM" + +if not os.path.exists(path_out): + os.mkdirs(path_out) + +# Now we initiate Pyatoa Config object which controls processing +pyflex_cfg = {"stalta_waterlevel": 0.05, + "tshift_acceptance_level": 25, + "cc_acceptance_level": 0.6 + } +cfg = Config(min_period=10, max_period=200, component_list=["Y"], + pyflex_preset="default", adj_src_type="multitaper", + st_obs_type="syn", st_syn_type="syn", **pyflex_cfg + ) +# Used for determining the time offset T0 of the synthetics +dummy_origintime = UTCDateTime("2000-01-01T00:00:00") + +# Loop through all synthetics and process +for fid_syn, fid_obs in zip(sorted(os.listdir(path_syn)), + sorted(os.listdir(path_obs))): + + st_syn = read_sem(os.path.join(path_syn, fid_syn), + origintime=dummy_origintime) + st_obs = read_sem(os.path.join(path_obs, fid_obs), + origintime=dummy_origintime) + + # Provide Manager with Config and data, let it do the rest + mgmt = Manager(config=cfg, st_obs=st_obs, st_syn=st_syn) + mgmt.flow() + mgmt.plot(show=False, save=os.path.join(path_out, "figures", + f"{st_obs[0].get_id()}.png")) + + # Output adjoint sources + mgmt.stats.time_offset_sec = st_obs[0].stats.starttime - dummy_origintime + mgmt.write_adjsrcs(path=path_out, write_blanks=True) + + diff --git a/pyatoa/tests/test_asdf_utils.py b/pyatoa/tests/test_asdf_utils.py index d7258505..8d1b3f9c 100644 --- a/pyatoa/tests/test_asdf_utils.py +++ b/pyatoa/tests/test_asdf_utils.py @@ -7,8 +7,8 @@ import pytest from obspy import read, read_events, read_inventory from pyasdf import ASDFDataSet -from pyatoa import Config, Manager, logger -from pyatoa.utils.asdf import (add, clean, load, write) +from pyatoa import Config, Manager +from pyatoa.utils.asdf import (add, clean, load) @pytest.fixture @@ -75,7 +75,7 @@ def config(): """ Default Pyatoa Config object """ - return Config(event_id="2018p130600", client="GEONET") + return Config(event_id="2018p130600") @pytest.fixture @@ -93,7 +93,7 @@ def mgmt_post(mgmt_pre): A manager that has completed the full workflow """ mgmt_pre.standardize() - mgmt_pre.preprocess() + mgmt_pre.preprocess(remove_response=True, output="DISP") mgmt_pre.window() mgmt_pre.measure() @@ -155,32 +155,29 @@ def test_add_adjoint_sources(empty_dataset, mgmt_post): assert(adjsrc.misfit == misfit_check) -def test_clean_dataset(empty_dataset, mgmt_pre): +def test_clean_dataset(empty_dataset, mgmt_post): """ Test dataset clean functions. Need to perform tasks on a dataset we create here, otherwise we may permanently affect test data if we use a pre-built ASDFDataSet - :return: """ assert(not hasattr(empty_dataset.auxiliary_data, "MisfitWindows")) - mgmt_pre.ds = empty_dataset - mgmt_pre.flow() + mgmt_post.write_to_dataset(ds=empty_dataset) assert(hasattr(empty_dataset.auxiliary_data, "MisfitWindows")) clean.clean_dataset(empty_dataset, fix_windows=False) assert(not hasattr(empty_dataset.auxiliary_data, "MisfitWindows")) -def test_clean_dataset_fix_windows(empty_dataset, mgmt_pre): +def test_clean_dataset_fix_windows(empty_dataset, mgmt_post): """ Test cleaning a dataset but retaining windows :return: """ assert(not hasattr(empty_dataset.auxiliary_data, "MisfitWindows")) - mgmt_pre.ds = empty_dataset - mgmt_pre.flow() + mgmt_post.write_to_dataset(ds=empty_dataset) assert(hasattr(empty_dataset.auxiliary_data, "MisfitWindows")) clean.clean_dataset(empty_dataset, fix_windows=True) @@ -200,20 +197,16 @@ def test_load_windows(dataset): assert(windows["N"][0].max_cc_value == check_val) -def test_load_previous_windows(empty_dataset, mgmt_pre): +def test_load_previous_windows(empty_dataset, mgmt_post): """ Test the function that returns windows in the Pyflex output format from an ASDFDataSet - - :param dataset: - :return: """ # Add two sets of windows to the dataset for iter_ in ["i01", "i02"]: - mgmt_pre.ds = empty_dataset - mgmt_pre.config.iteration = iter_ - mgmt_pre.config.step_count = "s00" - mgmt_pre.flow() + mgmt_post.config.iteration = iter_ + mgmt_post.config.step_count = "s00" + mgmt_post.write_to_dataset(ds=empty_dataset) windows = empty_dataset.auxiliary_data.MisfitWindows check = load.previous_windows(windows, iteration="i02", step_count="s00") diff --git a/pyatoa/tests/test_config.py b/pyatoa/tests/test_config.py index 9123b34c..7836efd0 100644 --- a/pyatoa/tests/test_config.py +++ b/pyatoa/tests/test_config.py @@ -15,19 +15,20 @@ def test_io_asdf(tmpdir): this functionality is called, that it doesn't lose any information in the conversion process. """ - test_path = "this/is/a/test/path" test_value = 9999. # Set some ridiculous values to check upon re-reading - cfg = Config(paths={"waveforms": test_path}, c_0=test_value, - dt_sigma_min=test_value) + cfg = Config(pyflex_parameters={'c_0': test_value}, + pyadjoint_parameters={'dt_sigma_min': test_value} + ) + + ds = ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) + cfg.write(write_to=ds) + cfg_check = Config(ds=ds, path="default") + assert cfg_check.pyflex_config.c_0 == test_value + assert cfg_check.pyadjoint_config.dt_sigma_min == test_value - with ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) as ds: - cfg.write(write_to=ds) - cfg_check = Config(ds=ds, path="default") - assert cfg_check.paths["waveforms"] == [test_path] - assert cfg_check.pyflex_config.c_0 == test_value - assert cfg_check.pyadjoint_config.dt_sigma_min == test_value + del ds def test_io_yaml(tmpdir): @@ -38,13 +39,13 @@ def test_io_yaml(tmpdir): test_value = 9999. # Set some ridiculous values to check upon re-reading - cfg = Config(paths={"waveforms": test_path}, c_0=test_value, - dt_sigma_min=test_value) + cfg = Config(pyflex_parameters={'c_0': test_value}, + pyadjoint_parameters={'dt_sigma_min': test_value} + ) cfg.write(os.path.join(tmpdir, "test_config.yaml")) # Ensure ridiculous values read back in cfg_check = Config(yaml_fid=os.path.join(tmpdir, "test_config.yaml")) - assert cfg_check.paths["waveforms"] == [test_path] assert cfg_check.pyflex_config.c_0 == test_value assert cfg_check.pyadjoint_config.dt_sigma_min == test_value @@ -88,18 +89,9 @@ def test_incorrect_parameter_check(): # Try to set unaccetapable parameter intputs incorrect_data = {"unit_output": "DISPLACEMENT", "synthetic_unit": "ACCELERATION", - "cfgpaths": [], - "cfgpaths": {"wave"}, - "win_amp_ratio": 1.5 } with pytest.raises(AssertionError): for key, value in incorrect_data.items(): cfg = Config() setattr(cfg, key, value) cfg._check() - - # unused key word arguments should result in ValueError - with pytest.raises(ValueError): - Config(ununused_kwarg="I dont belong :(", check_unused=True) - - diff --git a/pyatoa/tests/test_data/baseline_images/default_manager_plot_both.png b/pyatoa/tests/test_data/baseline_images/default_manager_plot_both.png index 2706b1a4..4fc8d65b 100644 Binary files a/pyatoa/tests/test_data/baseline_images/default_manager_plot_both.png and b/pyatoa/tests/test_data/baseline_images/default_manager_plot_both.png differ diff --git a/pyatoa/tests/test_data/baseline_images/default_manager_plot_wav.png b/pyatoa/tests/test_data/baseline_images/default_manager_plot_wav.png index c9090b65..a3cfd002 100644 Binary files a/pyatoa/tests/test_data/baseline_images/default_manager_plot_wav.png and b/pyatoa/tests/test_data/baseline_images/default_manager_plot_wav.png differ diff --git a/pyatoa/tests/test_gatherer.py b/pyatoa/tests/test_gatherer.py deleted file mode 100644 index ea88b5a1..00000000 --- a/pyatoa/tests/test_gatherer.py +++ /dev/null @@ -1,186 +0,0 @@ -""" -Test the functionalities of the Pyatoa Gatherer class -""" -import pytest -from obspy import read_events -from pyasdf import ASDFDataSet -from pyatoa import Config -from pyatoa.core.gatherer import Gatherer, GathererNoDataException - - -@pytest.fixture -def code(): - """ - Example NZ station code - """ - return "NZ.BFZ.??.HH*" - - -@pytest.fixture -def event_id(): - """ - Example NZ event identifier - """ - return "2018p130600" - - -@pytest.fixture -def dataset_fid(): - """ - The name for the dataset file id used for temporary storage - :return: - """ - return "./test_data/test_ASDFDataSet.h5" - - -@pytest.fixture -def cat(): - """ - ObsPy Event Catalog for New Zealand based event with - GeoNet Event ID: 2018p130600 - """ - return read_events("./test_data/test_catalog_2018p130600.xml") - - -@pytest.fixture -def event(cat): - """ - Event from Catalog - """ - return cat[0] - - -@pytest.fixture -def origintime(event): - """ - The origin time of the example event - :return: - """ - return event.preferred_origin().time - - -@pytest.fixture -def config(event_id): - """ - Default Pyatoa Config object - """ - return Config(event_id=event_id, iteration=1, step_count=0, - save_to_ds=False) - - -@pytest.fixture -def gatherer(config, origintime): - """ - The Gatherer which is responsible for gathering data. - """ - return Gatherer(config=config, origintime=origintime) - - -def test_asdf_event_fetch(gatherer, dataset_fid): - """ - Get event from an ASDFDataSet. - """ - with ASDFDataSet(dataset_fid) as ds: - gatherer.ds = ds - assert gatherer.fetch_event_from_dataset() is not None - - -def test_asdf_station_fetch(gatherer, dataset_fid, code): - """ - Get station from an ASDFDataSet - """ - with ASDFDataSet(dataset_fid) as ds: - gatherer.ds = ds - inv = gatherer.fetch_inv_from_dataset(code) - assert len(inv[0][0]) == 3 - - -def test_asdf_waveform_fetch(gatherer, dataset_fid, code, config): - """ - Get waveforms from an ASDFDataSet - """ - with ASDFDataSet(dataset_fid) as ds: - gatherer.ds = ds - for tag in [config.observed_tag, config.synthetic_tag]: - st = gatherer.fetch_waveform_from_dataset(code, tag) - assert len(st) == 3 - - -def test_fetch_event_by_dir(gatherer, event_id): - """ - Get event information based on given directory structure. Test the various - types of input sources that are allowable by Pyatoa - """ - # No Config path means fetching returns nada - assert gatherer.fetch_event_by_dir(event_id) is None - - gatherer.config.paths["events"] = "./test_data/" - - prefixes = ["test_CMTSOLUTION_", "test_FORCESOLUTION_", "test_SOURCE_"] - - # Test each type of available source from SPECFEM2D and SPECFEM3D - for prefix in prefixes: - event = gatherer.fetch_event_by_dir(event_id=event_id, prefix=prefix) - - assert event is not None - assert event_id in event.resource_id.id - - -def test_fetch_inv_by_dir(gatherer, code): - """ - Get response based on given directory structure - """ - # No Config path means fetching returns nada - assert gatherer.fetch_inv_by_dir(code) is None - - gatherer.config.paths["responses"] = "./test_data/test_seed" - inv = gatherer.fetch_inv_by_dir(code) - assert inv is not None - assert hasattr(inv[0][0][0], "response") - - -def test_fetch_observed_by_dir(gatherer, code): - """ - Get waveforms based on given directory strucutre - """ - assert gatherer.fetch_observed_by_dir(code) is None - - gatherer.config.paths["waveforms"] = "./test_data/test_mseeds" - st = gatherer.fetch_observed_by_dir(code) - assert st is not None - assert len(st) == 3 - - -def test_fetch_synthetic_by_dir(gatherer, code): - """ - Get synthetics based on given directory strucutre - """ - assert gatherer.fetch_synthetic_by_dir(code) is None - - gatherer.config.paths["synthetics"] = "./test_data/synthetics" - st = gatherer.fetch_synthetic_by_dir(code) - assert st is not None - assert len(st) == 3 - - # Testing out the synthetics only feature where obs data is searched for - # using synthetic data structure - assert gatherer.fetch_synthetic_by_dir( - code, syn_cfgpath="waveforms") is None - gatherer.config.paths["waveforms"] = "./test_data/synthetics" - st = gatherer.fetch_synthetic_by_dir(code, syn_cfgpath="waveforms") - assert st is not None - assert len(st) == 3 - - -def test_gather_event(gatherer, dataset_fid): - """ - Ensure gatherer can get an event from the correct sources - """ - with pytest.raises(GathererNoDataException): - gatherer.gather_event() - - with ASDFDataSet(dataset_fid) as ds: - gatherer.ds = ds - gatherer.Client = None - assert gatherer.gather_event() is not None - diff --git a/pyatoa/tests/test_manager.py b/pyatoa/tests/test_manager.py index d08b6a07..2f873edf 100644 --- a/pyatoa/tests/test_manager.py +++ b/pyatoa/tests/test_manager.py @@ -3,9 +3,8 @@ """ import pytest import os -import json -import numpy as np from pyasdf import ASDFDataSet +from pyadjoint import get_config as get_pyadjoint_config from pyatoa import Config, Manager, logger from pyatoa.core.manager import ManagerError from obspy import read, read_events, read_inventory @@ -64,7 +63,7 @@ def config(): """ Default Pyatoa Config object """ - return Config(event_id="2018p130600", client="GEONET") + return Config(event_id="2018p130600") # @pytest.fixture @@ -103,7 +102,7 @@ def mgmt_post(mgmt_pre): A manager that has completed the full workflow """ mgmt_pre.standardize() - mgmt_pre.preprocess() + mgmt_pre.preprocess(remove_response=True, output="DISP") mgmt_pre.window() mgmt_pre.measure() @@ -114,19 +113,20 @@ def test_read_write_from_asdfdataset(tmpdir, mgmt_pre, config): """ Write a Manager into an ASDFDataSet and then read it back """ - with ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) as ds: - mgmt_pre.ds = ds - mgmt_pre.write() + ds = ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) + mgmt_pre.write_to_dataset(ds=ds) + + # Load data back from dataset + mgmt_loaded = Manager(ds=ds, config=config) + mgmt_loaded.load("NZ.BFZ", path="default") - # Load data back from dataset - mgmt_loaded = Manager(ds=ds, config=config) - mgmt_loaded.load("NZ.BFZ", path="default") + # Manager has no equivalence represenation so just check some ids + assert(mgmt_pre.stats.event_id == mgmt_loaded.stats.event_id) + assert(mgmt_pre.stats.len_obs == mgmt_loaded.stats.len_obs) + assert(mgmt_pre.stats.len_syn == mgmt_loaded.stats.len_syn) + assert(mgmt_pre.stats.inv_name == mgmt_loaded.stats.inv_name) - # Manager has no equivalence represenation so just check some ids - assert(mgmt_pre.stats.event_id == mgmt_loaded.stats.event_id) - assert(mgmt_pre.stats.len_obs == mgmt_loaded.stats.len_obs) - assert(mgmt_pre.stats.len_syn == mgmt_loaded.stats.len_syn) - assert(mgmt_pre.stats.inv_name == mgmt_loaded.stats.inv_name) + del ds def test_standardize_to_synthetics(mgmt_pre): @@ -193,36 +193,15 @@ def test_preprocess_rotate_to_rtz(mgmt_pre): assert(float(f"{mgmt_pre.baz:.2f}") == 3.21) -def test_preprocess_overwrite(mgmt_pre): - """ - Apply an overwriting preprocessing function to ensure functionality works - """ - # First ensure that only functions can be passed - with pytest.raises(AssertionError): - mgmt_pre.preprocess(overwrite="not a function") - - def preproc_fx(st, choice, value=1, **kwargs): - """Zero out the data for an easy check on results""" - for tr in st: - tr.data *= value - return st - - mgmt_pre.preprocess(overwrite=preproc_fx, value=0) - - for tr in mgmt_pre.st: - assert(not tr.data.any()) - - def test_select_window(mgmt_pre): """ Ensure windows functionality works as advertised """ - assert(mgmt_pre.config.pyflex_preset == "default") - # Check that error is raised if insufficient workflow progress with pytest.raises(ManagerError): mgmt_pre.window() - mgmt_pre.standardize().preprocess().window() + mgmt_pre.standardize().preprocess( + remove_response=True, output="DISP").window() # Ensure the correct number of windows are chosen for comp, nwin in {"N": 1, "E": 1}.items(): @@ -236,39 +215,41 @@ def test_save_and_retrieve_windows(tmpdir, mgmt_post): recalculated but since the waveforms are the same, the values will be the same as before. """ - with ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) as ds: - mgmt_post.ds = ds - # Explicitely set the model and step count - mgmt_post.config.iteration = 0 - mgmt_post.config.step_count = 0 - mgmt_post.config.save_to_ds = True - saved_windows = mgmt_post.windows - mgmt_post.save_windows() # saved to path 'm00/s00' - - # Delete windows, iterate step, retrieve fixed windows - mgmt_post.windows = None - mgmt_post.config.step_count += 1 - mgmt_post.window(fix_windows=True) - - # Just check some parameter for each window to make sure all goods - for comp in mgmt_post.windows: - for w, window in enumerate(mgmt_post.windows[comp]): - for attr in ["left", "right", "cc_shift"]: - assert(getattr(window, attr) == - getattr(saved_windows[comp][w], attr)) - - # Delete windows, slightly change synthetic waveforms and check to make - # sure that recalculated criteria are different - mgmt_post.windows = None - for tr in mgmt_post.st_syn: - tr.data *= 2 - mgmt_post.window(fix_windows=True) - - for comp in mgmt_post.windows: - for w, window in enumerate(mgmt_post.windows[comp]): - # Amplitude ratios will be different since we multipled them - assert (getattr(window, "dlnA") != - getattr(saved_windows[comp][w], "dlnA")) + ds = ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) + + mgmt_post.ds = ds + # Explicitely set the model and step count + mgmt_post.config.iteration = 0 + mgmt_post.config.step_count = 0 + saved_windows = mgmt_post.windows + mgmt_post.write_to_dataset(choice=["windows"]) # saved to path 'm00/s00' + + # Delete windows, iterate step, retrieve fixed windows + mgmt_post.windows = None + mgmt_post.config.step_count += 1 + mgmt_post.window(fix_windows=True) + + # Just check some parameter for each window to make sure all goods + for comp in mgmt_post.windows: + for w, window in enumerate(mgmt_post.windows[comp]): + for attr in ["left", "right", "cc_shift"]: + assert(getattr(window, attr) == + getattr(saved_windows[comp][w], attr)) + + # Delete windows, slightly change synthetic waveforms and check to make + # sure that recalculated criteria are different + mgmt_post.windows = None + for tr in mgmt_post.st_syn: + tr.data *= 2 + mgmt_post.window(fix_windows=True) + + for comp in mgmt_post.windows: + for w, window in enumerate(mgmt_post.windows[comp]): + # Amplitude ratios will be different since we multipled them + assert (getattr(window, "dlnA") != + getattr(saved_windows[comp][w], "dlnA")) + + del ds def test_save_adjsrcs(tmpdir, mgmt_post): @@ -276,10 +257,12 @@ def test_save_adjsrcs(tmpdir, mgmt_post): Checks that adjoint sources can be written to dataset and will match the formatting required by Specfem3D """ - with ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) as ds: - mgmt_post.ds = ds - mgmt_post.save_adjsrcs() - assert(hasattr(ds.auxiliary_data.AdjointSources.default, "NZ_BFZ_BXN")) + ds = ASDFDataSet(os.path.join(tmpdir, "test_dataset.h5")) + mgmt_post.ds = ds + mgmt_post.write_to_dataset(choice=["adjsrcs"]) + assert(hasattr(ds.auxiliary_data.AdjointSources.default, "NZ_BFZ_BXN")) + + del ds def test_format_windows(mgmt_post): @@ -297,15 +280,55 @@ def test_format_windows(mgmt_post): for value in values: assert(isinstance(value, float)) + def test_flow_multiband(mgmt_pre): """ Test that the workflow for multiple period bands returns a single adjoint source """ - windows, adjsrcs = mgmt_pre.flow_multiband( - periods=[(1, 10), (10, 30), (15, 40)] - ) + mgmt_pre.flow_multiband(periods=[(1, 10), (10, 30), (15, 40)], + remove_response=True, output="DISP") + # Just check that the expected values don't change - assert(pytest.approx(adjsrcs["E"].max(), .001) == 8914.48) - assert(pytest.approx(adjsrcs["N"].max(), .001) == 3173.05) - assert(pytest.approx(adjsrcs["Z"].max(), .001) == 2749.96) + assert(pytest.approx(mgmt_pre.adjsrcs["E"].misfit, .001) == 0.33739) + assert(pytest.approx(mgmt_pre.adjsrcs["N"].misfit, .001) == 0.52064) + assert(pytest.approx(mgmt_pre.adjsrcs["Z"].misfit, .001) == 0.29031) + + +def test_resample_numerical_noise(mgmt_pre): + """ + Raised in Issue #34 by Ridvan O. (rdno). + + Numerical noise can be introduced by resampling a trace that already has + the correct sampling rate, which leads to non-zero adjoint sources/misfit + when traces are identical due to very slight time shifts introduced from + the resampling method. + + This check ensures the fix (do not resample if sampling rates are the same) + continues to work. + """ + # Ensure that both traces are the same + mgmt_pre.st_obs = mgmt_pre.st_syn.copy() + + # Using synthetic data + mgmt_pre.config.st_obs_type = "syn" + + # Take some parameters from the Issue + mgmt_pre.config.pyadjoint_config = get_pyadjoint_config( + adjsrc_type="waveform", min_period=20., + max_period=100. + ) + + mgmt_pre.standardize() + mgmt_pre.preprocess() + mgmt_pre.measure() # skip windowing as we get the same result without + + # mgmt_pre.plot(choice="wav") # creates the figure shown in Issue #34 + + # adjoint sources should be zero because these are the same trace + for comp, adjsrc in mgmt_pre.adjsrcs.items(): + assert(adjsrc.adjoint_source.max() == 0) + assert(adjsrc.adjoint_source.min() == 0) + + + diff --git a/pyatoa/tests/test_mgmt_plot.py b/pyatoa/tests/test_mgmt_plot.py index dad5a424..ed5fdadf 100644 --- a/pyatoa/tests/test_mgmt_plot.py +++ b/pyatoa/tests/test_mgmt_plot.py @@ -22,7 +22,7 @@ def mgmt(): """ mgmt = Manager() mgmt.load() - mgmt.flow() + mgmt.flow(remove_response=True, output="DISP") return mgmt diff --git a/pyatoa/tests/test_process_util.py b/pyatoa/tests/test_process_util.py index fb605014..d865241c 100644 --- a/pyatoa/tests/test_process_util.py +++ b/pyatoa/tests/test_process_util.py @@ -78,10 +78,14 @@ def test_filters(st_obs): min_period = 10 max_period = 30 outputs = [ - process.filters(st_obs, min_period=min_period, max_period=max_period), - process.filters(st_obs, min_period=min_period, min_freq=1/max_period), - process.filters(st_obs, max_freq=1/min_period, min_freq=1/max_period), - process.filters(st_obs, max_freq=1 / min_period, max_period=max_period) + process.apply_filter(st_obs, min_period=min_period, + max_period=max_period), + process.apply_filter(st_obs, min_period=min_period, + min_freq=1/max_period), + process.apply_filter(st_obs, max_freq=1/min_period, + min_freq=1/max_period), + process.apply_filter(st_obs, max_freq=1 / min_period, + max_period=max_period) ] check_result = outputs[0][0].data for output in outputs: diff --git a/pyatoa/tests/test_utils.py b/pyatoa/tests/test_utils.py index ba541117..9d1b476d 100644 --- a/pyatoa/tests/test_utils.py +++ b/pyatoa/tests/test_utils.py @@ -7,12 +7,11 @@ import pytest import numpy as np from glob import glob -from pysep.utils.io import read_stations from obspy import UTCDateTime, read_inventory from obspy.core.util.testing import streams_almost_equal from pyasdf import ASDFDataSet -from pyatoa.utils import (adjoint, calculate, form, images, read, srcrcv, - window, write) +from pyatoa.utils import (adjoint, calculate, form, images, srcrcv, window, + write) @pytest.fixture @@ -117,21 +116,12 @@ def test_form_utils(): "quakeml:earthquake.usgs.gov/fdsnws/event/1/query?eventid={eid}" "&format=quakeml", # USGS "quakeml:us.anss.org/event/{eid}", # USGS COMCAT, - "smi:local/source/{eid}/source" ] for test_case in test_cases: test_case = test_case.format(eid=eid) assert(form.format_event_name(test_case) == eid) -# ============================= TEST IMAGE UTILS =============================== -def test_images_utils(): - """ - Test image processing utilities - """ - # !!! TO DO: add .pdf and .png files to test data to test this functionality - pass - # ============================= TEST READ/ WRITE UTILS ========================= @@ -142,13 +132,11 @@ def test_write_utils(tmpdir, station_fid, sem_fid, ds): """ # Test write_inv_seed with edited dir structure # Mess with the default template naming schema and see if it returns goodly - inv = read_stations(station_fid) - write.write_inv_seed(inv, path=tmpdir, dir_structure="{net}+{sta}", - file_template="TEST-{loc}.{cha}_{sta}-{net}", - components="TZR", channel_code="BX{comp}") + inv = read_inventory() + write.write_inv_seed(inv, path=tmpdir) - check_fids = ["NZ+BFZ/TEST-.BXR_BFZ-NZ", "NZ+BFZ/TEST-.BXT_BFZ-NZ", - "NZ+BFZ/TEST-.BXZ_BFZ-NZ"] + check_fids = ["FUR.GR/RESP.GR.FUR..BHE", "RJOB.BW/RESP.BW.RJOB..EHZ", + "WET.GR/RESP.GR.WET..LHN"] for check_fid in check_fids: assert(os.path.exists(os.path.join(tmpdir, check_fid))) diff --git a/pyatoa/tests/test_wave_maker.py b/pyatoa/tests/test_wave_maker.py index f5e50c5e..223c201b 100644 --- a/pyatoa/tests/test_wave_maker.py +++ b/pyatoa/tests/test_wave_maker.py @@ -65,7 +65,7 @@ def config(): """ Default Pyatoa Config object """ - return Config(event_id="2018p130600", client="GEONET") + return Config(event_id="2018p130600") @pytest.fixture @@ -76,7 +76,7 @@ def mgmt(config, event, st_obs, st_syn, inv): mgmt = Manager(config=config, event=event, st_obs=st_obs, st_syn=st_syn, inv=inv) mgmt.standardize() - mgmt.preprocess() + mgmt.preprocess(remove_response=True, output="DISP") mgmt.window() mgmt.measure() return mgmt diff --git a/pyatoa/utils/form.py b/pyatoa/utils/form.py index 7f0216b3..50190243 100644 --- a/pyatoa/utils/form.py +++ b/pyatoa/utils/form.py @@ -6,12 +6,39 @@ to keep everything playing nice. Functions here will aid in reshaping data into the correct formats. """ -import os from pyasdf import ASDFDataSet -from pysep.utils.mt import Source from obspy.core.event import Event +def channel_code(dt): + """ + Specfem outputs seismograms with channel band codes set by IRIS. Instrument + codes are always X for synthetics, but band code will vary with the sampling + rate of the data, return the correct code given a sampling rate. + Taken from Appenix B of the Specfem3D cartesian manual (June 15, 2018) + + :type dt: float + :param dt: sampling rate of the data in seconds + :rtype: str + :return: band code as specified by Iris + :raises KeyError: when dt is specified incorrectly + """ + if dt >= 1: + return "L" # long period + elif 0.1 < dt < 1: + return "M" # mid period + elif 0.0125 < dt <= 0.1: + return "B" # broad band + elif 0.001 <= dt <= 0.0125: + return "H" # high broad band + elif 0.004 <= dt < 0.001: + return "C" + elif 0.001 <= dt < 0.0002: + return "F" + else: + raise KeyError("Channel code does not exist for this value of 'dt'") + + def format_iter(iteration): """ Format the iteration to be used in internal path naming and labelling. @@ -71,7 +98,7 @@ def format_event_name(ds_or_event): :return: the event name to be used for naming schema in the workflow """ # Extract the resource id dependent on the input file type - if isinstance(ds_or_event, (Event, Source)): + if isinstance(ds_or_event, Event): rid = ds_or_event.resource_id.id elif isinstance(ds_or_event, str): rid = ds_or_event @@ -100,44 +127,8 @@ def format_event_name(ds_or_event): # USGS ANSS ComCat: quakeml:us.anss.org/event/20005ysu elif "ANSS" in rid_up: return rid.split("event/")[-1] - # SOURCE pyatoa.read.read_specfem2d_source: pyatoa:source/*/event - elif "SOURCE" in rid_up: - return rid.split("/")[-2] - # PYATOA pyatoa.read.read_force_solution: pyatoa:source/ - elif "PYATOA" in rid_up: - return rid.split("source/")[-1] else: raise NotImplementedError(f"Unexpected resource id format '{rid}', " "Please raise a GitHub issue and the " "developers will address this") - -def convert_stations_to_seed(stations_file="./STATIONS", - path_to_save_seed="./seed", **kwargs): - """ - A convenience function to format a SPECFEM file into SeisFlows3 ready files. - Convert a SPECFEM STATIONS file into a directory of SEED formatted - StationXML files which are REQUIRED by a Pyatoa + SeisFlows3 workflow.\ - - Kwargs are passed to pyatoa.utils.write.write_inv_seed() - See above function for available options on writing SEED StationXML files - - :type stations_file: str - :param stations_file: path to the STATIONS file defined in SPECFEM format - :type path_to_save_seed: str - :param path_to_save_seed: path to save the output SEED formatted - StationXML files - """ - # Late imports to avoid circular ImportErrors - from pyatoa.utils.read import read_stations - from pyatoa.utils.write import write_inv_seed - - if not os.path.exists(path_to_save_seed): - os.mkdir(path_to_save_seed) - - # Read SPECFEM Stations file - inv = read_stations(stations_file) - - # Write inventory as a collection of StationXML files - write_inv_seed(inv=inv, path=path_to_save_seed, **kwargs) - diff --git a/pyatoa/utils/process.py b/pyatoa/utils/process.py index 8db51e22..60cfe0a2 100644 --- a/pyatoa/utils/process.py +++ b/pyatoa/utils/process.py @@ -8,102 +8,8 @@ from pyatoa import logger -def default_process(st, choice, inv=None, baz=None, min_period=None, - max_period=None, unit_output=None, half_dur=None, - rotate_to_rtz=False, apply_filter=True, - remove_response=True, convolve_with_stf=True, **kwargs): - """ - Generalized, default preprocessing function to process waveform data. - Preprocessing is slightly different for obs and syn waveforms. Each - processing function is split into a separate function so that they can - be called by custom preprocessing functions. - - :type st: obspy.core.stream.Stream - :param st: Stream object to be preprocessed - :type inv: obspy.core.inventory.Inventory - :choice inv: Inventory containing response information for real waveform - data. If not provided, reseponse will not be removed. Also used for - stream rotation from ZNE -> RTZ - :type choice: str - :param choice: 'obs' or 'syn' for observed or synthetic waveform. 'obs' will - attempt to remove response information with `inv`. 'syn' will attempt - to convolve with a half duration. - :type remove_response: bool - :param remove_response: flag, remove instrument response from choice=='obs' - data using the provided `inv`. Defaults to True - :type apply_filter: bool - :param apply_filter: flag, filter the waveforms using the - `min_period` and `max_period` parameters. Defaults to True - :type convolve_with_stf: bool - :param convolve_with_stf: flag, convolve choice=='syn' data with a Gaussian - source time function if a `half_dur` (half duration) is provided. - Defaults to true - :type rotate_to_rtz: bool - :param rotate_to_rtz: flag, use the `rotate_baz` variable to rotate streams - from ZNE components to RTZ - :rtype: obspy.core.stream.Stream - :return: preprocessed stream object pertaining to `choice` - - Keyword Arguments - :: - int water_level: - water level for response removal - float taper_percentage: - amount to taper ends of waveform - """ - filter_corners = kwargs.get("filter_corners", 2) - water_level = kwargs.get("water_level", 60) - taper_percentage = kwargs.get("taper_percentage", 0.05) - zerophase = kwargs.get("zerophase", True) - - assert choice in ["obs", "syn"], f"preprocess `choice` must be 'obs', 'syn'" - - if is_preprocessed(st): - logger.info("`st` already preprocessed, skipping") - return st - else: - st_out = st.copy() - - # Get rid of any long period trends that may affect that data - st_out.detrend("simple").detrend("demean").taper(taper_percentage) - - if choice == "obs" and remove_response: - logger.info(f"remove response, units: {unit_output}") - st_out.remove_response(inventory=inv, output=unit_output, - water_level=water_level, plot=False) - - # Rotate streams if not in ZNE, e.g. Z12. Only necessary for observed - logger.info("rotate -> ZNE") - st_out.rotate(method="->ZNE", inventory=inv, - components=["ZNE", "Z12", "123"]) - st_out.detrend("simple").detrend("demean").taper(taper_percentage) - else: - logger.info(f"skip remove response (no `inv`)") - - # Rotate the given stream from standard NEZ to RTZ if BAz given - if rotate_to_rtz and baz is not None: - logger.info(f"rotate NE->RT by {baz} degrees") - st_out.rotate(method="NE->RT", back_azimuth=baz) - - # Filter data based on the given period bounds - if apply_filter and (min_period is not None or max_period is not None): - st_out = filters(st_out, min_period=min_period, max_period=max_period, - corners=filter_corners, zerophase=zerophase - ) - st_out.detrend("simple").detrend("demean").taper(taper_percentage) - else: - logger.info(f"no filter applied to data") - - # Convolve synthetic data with a Gaussian source time function - if choice == "syn": - if convolve_with_stf and half_dur is not None: - st_out= stf_convolve(st=st_out, half_duration=half_dur) - - return st_out - - -def filters(st, min_period=None, max_period=None, min_freq=None, max_freq=None, - corners=2, zerophase=True, **kwargs): +def apply_filter(st, min_period=None, max_period=None, min_freq=None, + max_freq=None, corners=2, zerophase=True, **kwargs): """ Choose the appropriate filter depending on the ranges given. Either periods or frequencies can be given. Periods will be prioritized. @@ -151,7 +57,7 @@ def filters(st, min_period=None, max_period=None, min_freq=None, max_freq=None, st.filter("bandpass", corners=corners, zerophase=zerophase, freqmin=min_freq, freqmax=max_freq, **kwargs) logger.info(f"bandpass filter: {min_period} - {max_period}s w/ " - f"{corners} corners") + f"{corners} corners") # Minimum period only == lowpass filter elif min_period: @@ -350,6 +256,55 @@ def match_npts(st_a, st_b, force=None): return st_change, st_const +def normalize(st_a, st_b, choice): + """ + Normalize amplitudes in traces to the absolute maximum amplitude of the + other, or normalize all traces so that their maximum value is a given value. + + :type st_a: obspy.stream.Stream + :param st_a: one stream to match samples with + :type st_b: obspy.stream.Stream + :param st_b: one stream to match samples with + :type choice: str or int or float + :param choice: choose which stream to use as the default npts, + defaults to 'a', options: 'a', 'b' or a value that you want to set the + max amplitude to be equal to, e.g., 1 + :rtype: tuple (obspy.stream.Stream, obspy.stream.Stream) + :return: streams that have been normalized based on `choice` + :raises NotImplementedError: If incorrect value of `choice` is provided + """ + if isinstance(choice, str): + if choice == "a": + st_const = st_a.copy() + st_change = st_b.copy() + elif choice == "b": + st_change = st_a.copy() + st_const = st_b.copy() + else: + raise NotImplementedError("normalize `choice` must be 'a' or 'b'") + + for tr_const, tr_change in zip(st_const, st_change): + tr_change.data *= (abs(tr_const.max()) / abs(tr_change.max()) ) + + if choice == "a": + st_a_out = st_const + st_b_out = st_change + else: + st_a_out = st_change + st_b_out = st_const + elif isinstance(choice, (int, float)): + st_a_out = st_a.copy() + st_b_out = st_b.copy() + + for tr_a, tr_b in zip(st_a, st_b): + tr_a.data *= (choice / abs(tr_a.max())) + tr_b.data *= (choice / abs(tr_b.max())) + else: + raise NotImplementedError(f"invalid choice {choice} for normalization") + + return st_a_out, st_b_out + + def is_preprocessed(st, filter_only=True): """ Check to make sure a stream object has not yet been run through @@ -427,14 +382,13 @@ def stf_convolve(st, half_duration, source_decay=4., time_shift=None, time_offset_in_samp = int(time_offset * sampling_rate) # convolve each trace with the soure time function and time shift if needed - st_out = st.copy() - for tr in st_out: + for tr in st: if time_shift: tr.stats.starttime += time_shift data_out = np.convolve(tr.data, gaussian_stf, mode="same") tr.data = data_out - return st_out + return st diff --git a/pyatoa/utils/read.py b/pyatoa/utils/read.py deleted file mode 100644 index 33e08c81..00000000 --- a/pyatoa/utils/read.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -Utilities for reading various file types, mostly from Specfem3D to ObsPy classes -These are meant to be standalone functions so they may repeat some functionality -found elsewhere in the package. -""" -import os -import numpy as np -from obspy import Stream, Trace, UTCDateTime, Inventory -from obspy.core.inventory.network import Network -from obspy.core.inventory.station import Station -from obspy.core.event import Event, Origin, Magnitude -from pyatoa import logger - - -def read_fortran_binary(path): - """ - Convert a Specfem3D fortran .bin file into a NumPy array, - Copied verbatim from Seisflows/plugins/solver_io/fortran_binary.py/_read() - - :type path: str - :param path: path to fortran .bin file - :rtype: np.array - :return: fortran binary data as a numpy array - """ - nbytes = os.path.getsize(path) - with open(path, "rb") as f: - f.seek(0) - n = np.fromfile(f, dtype="int32", count=1)[0] - if n == nbytes - 8: - f.seek(4) - data = np.fromfile(f, dtype="float32") - return data[:-1] - else: - f.seek(0) - data = np.fromfile(f, dtype="float32") - return data - - - -def read_station_codes(path_to_stations, loc="??", cha="*", - seed_template="{net}.{sta}.{loc}.{cha}"): - """ - Read the SPECFEM3D STATIONS file and return a list of codes (Pyatoa format) - that are accepted by the Manager and Pyaflowa classes. Since the STATIONS - file only provides NET and STA information, the user must provide the - location and channel information, which can be wildcards. - - :type path_to_stations: str - :param path_to_stations: full path to the STATIONS file - :type loc: str - :param loc: formatting of the location section of the code, defaults to - '??' two-digit wildcard - :type cha: str - :param cha: formatting of the channel section fo the code, defaults to - 'HH?' for wildcard component of a high-gain seismometer. Follows SEED - convention (see IRIS). - :type seed_template: str - :param seed_template: string template to be formatted with some combination - of 'net', 'sta', 'loc' and 'cha', used for generating station codes - :rtype: list of str - :return: list of codes to be used by the Manager or Pyaflowa classes for - data gathering and processing - """ - codes = [] - stations = np.loadtxt(path_to_stations, dtype="str") - if stations.size == 0: - return codes - - # Deal with the special case where the stations file is only 1 station long - # otherwise we end up iterating over a string and not an ndarray - if not isinstance(stations[0], np.ndarray): - stations = [stations] - - for station in stations: - sta = station[0] - net = station[1] - codes.append(seed_template.format(net=net, sta=sta, loc=loc, cha=cha)) - - return codes - - diff --git a/pyatoa/utils/write.py b/pyatoa/utils/write.py index df0838e6..d8715fb4 100644 --- a/pyatoa/utils/write.py +++ b/pyatoa/utils/write.py @@ -2,10 +2,8 @@ For writing various output files used by Pyatoa, Specfem and Seisflows """ import os -import glob import numpy as np from obspy.core.inventory.channel import Channel -from pysep.utils.fmt import channel_code from pyatoa import logger from pyatoa.utils.form import format_event_name, format_iter, format_step @@ -34,9 +32,10 @@ def write_inv_seed(inv, path="./", dir_structure="{sta}.{net}", :type file_template: str :param file_template: template for file naming, likely should not change from default template - :type channels: str - :param channels: OPTIONAL, if inventory does not contain channels (e.g., - if read from a SPECFEM STATIONS file), channels will be generated here. + :type components: str + :param components: OPTIONAL, if inventory does not contain components (e.g., + if read from a SPECFEM STATIONS file), components will be filled in + automatically. :type channel_code: str :param channel_code: Explicitely defined default channel values for generating channels on the fly when none are provided by the inventory diff --git a/pyatoa/visuals/comp_wave.py b/pyatoa/visuals/comp_wave.py index aab645de..65c326a4 100644 --- a/pyatoa/visuals/comp_wave.py +++ b/pyatoa/visuals/comp_wave.py @@ -90,7 +90,6 @@ def _gather_model_from_dataset(self, dsfid, model=None, init_or_final=None, # Use the Manager class to load in waveform data mgmt = Manager(ds=ds) mgmt.load(code=self.station, path=model) - mgmt.config.save_to_ds = False # !!! NZ Temp network skip remove response net, sta = self.station.split(".") diff --git a/pyatoa/visuals/improve_wave.py b/pyatoa/visuals/improve_wave.py index 2baeba39..3c60a618 100644 --- a/pyatoa/visuals/improve_wave.py +++ b/pyatoa/visuals/improve_wave.py @@ -109,7 +109,6 @@ def gather(self, sta, min_period, max_period, rotate_to_rtz=False, mgmt.load(sta, path) # Overwrite some config parameters - mgmt.config.save_to_ds = False mgmt.config.min_period = min_period mgmt.config.max_period = max_period if rotate_to_rtz: @@ -166,7 +165,6 @@ def gather_simple(self, event, sta, min_period, max_period, path_dict=None, with asdf(ds_fid, mode="r") as ds: mgmt = Manager(ds=ds) mgmt.load(sta, path) - mgmt.config.save_to_ds = False mgmt.config.min_period = min_period mgmt.config.max_period = max_period mgmt.standardize().preprocess() diff --git a/pyatoa/visuals/map_maker.py b/pyatoa/visuals/map_maker.py index af95a7c8..37313a7a 100644 --- a/pyatoa/visuals/map_maker.py +++ b/pyatoa/visuals/map_maker.py @@ -45,8 +45,8 @@ def __init__(self, cat, inv, dpi=100, figsize=None, figure=None, # Set up a few useful parameters that will be called repeatedly self.ev_lat = self.event.preferred_origin().latitude self.ev_lon = self.event.preferred_origin().longitude - self.sta_lat = inv[0][0][0].latitude - self.sta_lon = inv[0][0][0].longitude + self.sta_lat = inv[0][0].latitude + self.sta_lon = inv[0][0].longitude # Used for coordinate transforms between lat/lon and projection self.ref_proj = ccrs.PlateCarree() @@ -61,7 +61,6 @@ def __init__(self, cat, inv, dpi=100, figsize=None, figure=None, figsize, dpi, figure, gridspec ) - def define_bounding_box(self, corners=None, corner_buffer_deg=2): """ Distribute the corners provided by the user, or determine corners @@ -317,16 +316,20 @@ def annotate(self, location="lower-right", anno_latlon=False): gc_dist, baz = gcd_and_baz(self.event, self.inv[0][0]) origin_time = self.event.origins[0].time depth = self.event.preferred_origin().depth * 1E-3 - magnitude = self.event.preferred_magnitude().mag - mag_type = self.event.preferred_magnitude().magnitude_type + if self.event.preferred_magnitude() is not None: + magnitude = self.event.preferred_magnitude().mag + mag_type = self.event.preferred_magnitude().magnitude_type + mag_str = f"{mag_type} {magnitude:.2f}\n" + else: + mag_str = "Magnitude info unavailable\n" + region = FlinnEngdahl().get_region(self.ev_lon, self.ev_lat) - # Need to use plot because basemap object has no annotate method self.ax.text(s=(f"{region.title()}\n" f"{'-'*len(region)}\n" f"{event_id} / {sta_id}\n" f"{origin_time.format_iris_web_service()}\n" - f"{mag_type} {magnitude:.2f}\n" + f"{mag_str}" f"Depth: {depth:.2f} km\n" f"Dist: {gc_dist:.2f} km\n" f"BAz: {baz:.2f}{DEGREE_CHAR}\n" diff --git a/pyatoa/visuals/wave_maker.py b/pyatoa/visuals/wave_maker.py index 3561ce57..d2ad4b87 100644 --- a/pyatoa/visuals/wave_maker.py +++ b/pyatoa/visuals/wave_maker.py @@ -544,9 +544,7 @@ def create_title(self, normalized=False, append_title=None): if self.config.step_tag is not None: title += self.config.step_tag - # Add information about the Pyflex and Pyadjoint parameters used - if self.kwargs.get("plot_stalta", True): - title += f" pyflex={self.config.pyflex_preset}, " + # Add information about the Pyadjoint parameters used if self.kwargs.get("plot_adjsrc", True): title += f" pyadjoint={self.config.adj_src_type}, " diff --git a/pyproject.toml b/pyproject.toml index 031e829f..70958215 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pyatoa" -version = "0.2.2" +version = "0.3.0" description = "Python's Adjoint Tomography Operations Assistant" readme = "README.md" requires-python = ">=3.7" @@ -22,7 +22,6 @@ dependencies = [ "pypdf", "pyflex", "pyadjoint", - "pysep-adjtomo" ] [project.optional-dependencies]