diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000..efb193b59 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,69 @@ +# Python CircleCI 2.0 configuration file +# +# Check https://circleci.com/docs/2.0/language-python/ for more details +# +general: + branches: + ignore: + - gh-pages + +version: 2 +jobs: + build: + docker: + # specify the version you desire here + # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers` + - image: circleci/python:3.6.1 + + # Specify service dependencies here if necessary + # CircleCI maintains a library of pre-built images + # documented at https://circleci.com/docs/2.0/circleci-images/ + # - image: circleci/postgres:9.4 + + working_directory: ~/repo + + steps: + - checkout + - restore_cache: + key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} + + - run: + name: install dependencies + command: | + python3 -m venv venv + . venv/bin/activate + pip install numpy>=1.12 + pip install -r requirements.txt + - save_cache: + key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} + paths: + - "venv" + - run: + name: install fftw + command: | + sudo apt-get update + sudo apt-get install libfftw3-dev + - run: + name: install EQcorrscan + command: | + . venv/bin/activate + python setup.py install + python setup.py build_ext --inplace + # run tests! + - run: + name: run tests + command: | + . venv/bin/activate + py.test -m "network" -n 2 + py.test eqcorrscan/doc/tutorials/*.rst eqcorrscan/doc/submodules/*.rst --cov-append + + - run: + name: Upload to codecov + command: | + . venv/bin/activate + ls -a + codecov + + - store_artifacts: + path: test-reports + destination: test-reports diff --git a/.travis.yml b/.travis.yml index b5104b93b..46b6216b4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ matrix: env: PYTHON_VERSION=3.6 - os: osx + osx_image: xcode8 env: - PYTHON_VERSION=3.5 @@ -24,8 +25,9 @@ install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export OS="MacOSX"; export py=$PYTHON_VERSION; - export CC=gcc-4.9; - export CXX=g++-4.9; + brew install gcc6; + export CC=gcc-6; + export CXX=g++-6; brew install fftw; else export OS="Linux"; @@ -57,7 +59,7 @@ install: - conda create -q -n test-environment python=$PYTHON_VERSION colorama numpy scipy matplotlib obspy flake8 mock coverage bottleneck pyproj - source activate test-environment - conda install h5py - - pip install pep8-naming pytest pytest-cov pytest-pep8 pytest-xdist codecov Cython + - pip install pep8-naming pytest pytest-cov pytest-pep8 pytest-xdist pytest-rerunfailures codecov Cython - pip freeze - conda list # done installing dependencies @@ -66,7 +68,9 @@ install: - python setup.py build_ext --inplace script: - - py.test --runslow + - py.test --runslow -m "not serial and not network" -n 2 + - py.test -m "serial and not network" --cov-append +# - py.test -m "network" --cov-append -n 2 after_success: # Check how much code is actually tested and send this report to codecov diff --git a/CHANGES.md b/CHANGES.md index 07bc02a1a..4ba33bc2e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,38 @@ +## Current +* Added the ability to change the correlation functions used in detection + methods through the parameter xcorr_func of match_filter, Template.detect + and Tribe.detect, or using the set_xcorr context manager in + the utils.correlate module. Supported options are: + * numpy + * fftw + * time-domain + * or passing a function that implements the xcorr interface. +* Added the ability to change the concurrency strategy of xcorr functions + using the paramter concurrency of match_filter, Template.detect + and Tribe.detect. Supported options are: + * None - for single-threaded execution in a single process + * multithread - for multi-threaded execution + * multiprocess- for multiprocess execution + * concurrent - allows functions to describe their own preferred currency + methods, defaults to multithread +* Change debug printing output, it should be a little quieter; +* Speed-up time-domain using a threaded C-routine - separate from frequency + domain C-routines; +* Expose useful parallel options for all correlation routines; +* Expose cores argument for match-filter objects to allow limits to be placed + on how much of your machine is used; +* Limit number of workers created during pre-processing to never be more than + the number of traces in the stream being processed; +* Implement openMP parallelisation of cross-correlation sum routines - memory + consumption reduced by using shared memory, and by computing the + cross-correlation sums rather than individual channel cross-correlations. + This also leads to a speed-up. This routine is the default concurrent + correlation routine; +* Test examples in rst doc files to ensure they are up-to-date; +* Tests that were prone to timeout issues have been migrated to run on circleci + to allow quick re-starting of fails not due to code errors + + ## 0.2.5 * Fix bug with \_group_process that resulted in stalled processes. * Force NumPy version @@ -17,19 +52,19 @@ template; option (previously only used for debugging in development); * Increase test coverage in lag_calc; * Speed-up tests for brightness; -* Increase test coverage for match_filter including testing io of +* Increase test coverage for match_filter including testing io of detections; * Increase subspace test coverage for edge cases; * Speed-up catalog_to_dd_tests; * Lag-calc will pick S-picks on channels ending E, N, 1 and 2, change from only picking on E and N before; warning added to docs; * Add full tests for pre-processing; -* Run tests in parallel on ci, speed-up tests dramatically; -* Rename singular-value decomposition functions (with depreciation +* Run tests in parallel on ci, speed-up tests dramatically; +* Rename singular-value decomposition functions (with depreciation warnings); * Rename SVD_moments to lower-case and add depreciation warning; * Increase test coverage in utils.mag_calc; -* Add Template, Tribe, Family, Party objects and rename DETECTION to +* Add Template, Tribe, Family, Party objects and rename DETECTION to Detection; * Template objects maintain meta-data associated with their creation to stream-line processing of data (e.g. reduce chance of using the @@ -45,10 +80,10 @@ Detection; * The Party object is a container for many Family objects. * Family objects are containers for detections from the same Template. - * Family and Party objects have a lag_calc method which computes + * Family and Party objects have a lag_calc method which computes the cross-correlation pick-refinements. * The upshot of this is that it is possible to, in one line, - generate a Tribe of templates, compute their matched-filter + generate a Tribe of templates, compute their matched-filter detections, and generate cross-correlation pick refinements, which output Event objects, which can be written to a catalog: Tribe.construct(method, **kwargs).detect(st, **kwargs).lag_calc(**kwargs).write() @@ -81,33 +116,33 @@ fewer traces than template; ## 0.1.6 * Fix bug introduced in version 0.1.5 for match_filter where looping -through multiple templates did not correctly match image and template +through multiple templates did not correctly match image and template data: 0.1.5 fix did not work; * Bug-fix in catalog_to_dd for events without magnitudes; * Amend match-filter to not edit the list of template names in place. -Previously, if a template was not used (due to no matching continuous -data) then the name of the template was removed: this now copies the +Previously, if a template was not used (due to no matching continuous +data) then the name of the template was removed: this now copies the list of template_names internally and does not change the external list. ## 0.1.5 * Migrate coverage to codecov; * Fix bug introduced in version 0.1.5 for match_filter where looping -through multiple templates did not correctly match image and template +through multiple templates did not correctly match image and template data. ## 0.1.4 * Bug-fix in plot_repicked removed where data were not normalized properly; -* Bug-fix in lag_calc where data were missing in the continuous data +* Bug-fix in lag_calc where data were missing in the continuous data fixed (this led to incorrect picks, **major bug!**); * Output cross-channel correlation sum in lag-calc output; * Add id to DETECTION objects, which is consistent with the events within DETECTION objects and catalog output, and used in lag_calc to allow linking of detections to catalog events; -* Add lots of logging and error messages to lag-calc to ensure user +* Add lots of logging and error messages to lag-calc to ensure user understands limits; * Add error to day-proc to ensure user is aware of risks of padding; -* Change utils.pre_processing.process to accept different length of +* Change utils.pre_processing.process to accept different length of data enforcement, not just full day (allow for overlap in processing, which might be useful for reducing day start and end effects); * Bug-fix in mag_calc.amp_pick_event, broke loop if data were missing; diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index aa6755f67..663f8df42 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,5 +1,7 @@ -Calum John Chamberlain -Chet Hopp -Emily Warren-Smith -Konstantinos Michailos -Shanna Chu +* Calum John Chamberlain +* Chet Hopp +* Emily Warren-Smith +* Konstantinos Michailos +* Shanna Chu +* Derrick Chambers +* Chris Scott diff --git a/MANIFEST.in b/MANIFEST.in index f2946ad13..17bdee047 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include README.md include LICENCE.txt include CHANGES.md include CONTRIBUTORS.md +include requirements.txt # include eqcorrscan/test_data/tutorial_data.tgz # include eqcorrscan/tutorial.py recursive-include eqcorrscan/utils *.py @@ -16,6 +17,7 @@ recursive-include eqcorrscan/core *.py recursive-include eqcorrscan/tests *.py include eqcorrscan/lib/libutils.def include eqcorrscan/lib/multi_corr.c +include eqcorrscan/lib/time_corr.c # exclude rules # global-exclude *.pyc *.ms *.tgz diff --git a/README.md b/README.md index d824bd196..c082d426c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,26 @@ # EQcorrscan ## A python package for the detection and analysis of repeating and near-repeating earthquakes. -[![Join the chat at https://gitter.im/eqcorrscan/EQcorrscan](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/eqcorrscan/EQcorrscan?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![TravisCIStatus](https://travis-ci.org/eqcorrscan/EQcorrscan.svg?branch=master)](https://travis-ci.org/eqcorrscan/EQcorrscan) -[![Build status](https://ci.appveyor.com/api/projects/status/b0924mp0uwwyap3d/branch/master?svg=true)](https://ci.appveyor.com/project/calum-chamberlain/eqcorrscan-jsycv/branch/master) -[![codecov](https://codecov.io/gh/eqcorrscan/EQcorrscan/branch/master/graph/badge.svg)](https://codecov.io/gh/eqcorrscan/EQcorrscan) -[![DOI](https://zenodo.org/badge/35918157.svg)](https://zenodo.org/badge/latestdoi/35918157) -[![DocumentationStatus](http://readthedocs.org/projects/eqcorrscan/badge/?version=latest)](http://eqcorrscan.readthedocs.org/en/latest/?badge=latest) -[![Dependency Status](https://dependencyci.com/github/eqcorrscan/EQcorrscan/badge)](https://dependencyci.com/github/eqcorrscan/EQcorrscan) -[![Stories in Ready](https://badge.waffle.io/eqcorrscan/EQcorrscan.png?label=ready&title=Ready)](http://waffle.io/eqcorrscan/EQcorrscan) +# Citation: +We have a manuscript in review, if you make use of EQcorrscan please cite the folloing paper: + +Chamberlain, C. J., Hopp, C. J., Boese, C. M., Warren-Smith, E., Chambers, D., Chu, S. X., Michailos, K., Townend, J., EQcorrscan: Repeating and near-repeating earthquake detection and analysis in Python. Seismological Research Letters *in review* + +If you want to you should also cite the version number: [![DOI](https://zenodo.org/badge/35918157.svg)](https://zenodo.org/badge/latestdoi/35918157) + +# Test status +Note that tests for travis and appveyor are run daily on master as cron jobs, and may reflect time-out issues. + +| Service tests | Badge | +|---------------|-------| +| OSX & Linux | [![TravisCIStatus](https://travis-ci.org/eqcorrscan/EQcorrscan.svg?branch=master)](https://travis-ci.org/eqcorrscan/EQcorrscan) +| Windows | [![Build status](https://ci.appveyor.com/api/projects/status/b0924mp0uwwyap3d/branch/master?svg=true)](https://ci.appveyor.com/project/calum-chamberlain/eqcorrscan-jsycv/branch/master) +| Code coverage | [![codecov](https://codecov.io/gh/eqcorrscan/EQcorrscan/branch/master/graph/badge.svg)](https://codecov.io/gh/eqcorrscan/EQcorrscan) +| Documentation | [![DocumentationStatus](http://readthedocs.org/projects/eqcorrscan/badge/?version=latest)](http://eqcorrscan.readthedocs.org/en/latest/?badge=latest) +| Dependency status | [![Dependency Status](https://dependencyci.com/github/eqcorrscan/EQcorrscan/badge)](https://dependencyci.com/github/eqcorrscan/EQcorrscan) +| Network tests | [![CircleCI](https://circleci.com/gh/eqcorrscan/EQcorrscan/tree/master.svg?style=svg)](https://circleci.com/gh/eqcorrscan/EQcorrscan/tree/master) +| Issues ready | [![Stories in Ready](https://badge.waffle.io/eqcorrscan/EQcorrscan.png?label=ready&title=Ready)](http://waffle.io/eqcorrscan/EQcorrscan) +| Chat on gitter | [![Join the chat at https://gitter.im/eqcorrscan/EQcorrscan](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/eqcorrscan/EQcorrscan?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) # Installation Installation has been tested on both OSX and Linux (Ubuntu), and @@ -17,6 +29,9 @@ Note that, although we support Windows, EQcorrscan is optimized for linux style distributions, and the developers are not extensive Windows users. +*OSX with gcc-4.9 from homebrew doesn't appear to compile properly, all other gcc versions + seem to work* + Instructions for installing EQcorrscan and the required dependency, fftw are linked from the [docs](http://eqcorrscan.readthedocs.io/en/latest/intro.html#installation) @@ -79,9 +94,7 @@ LGPL GNU License, Copyright EQcorrscan developers 2015, 2016, 2017. # Contributing Please fork this project and work on it there then create a pull request to -merge back to this main repository. If you are working on a bug-fix then -use the *develop* branch, otherwise, create a feature branch and work -on your addition there. +merge back to this main repository. Please create a branch from *develop*. When you make changes please run the tests in the test directory to ensure everything merges with minimum effort. If there is not yet a test to cope diff --git a/appveyor.yml b/appveyor.yml index 921a9b976..7970ddf66 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -54,7 +54,7 @@ install: # Install other pip dependancies - "pip install -U future" - - "pip install pytest pytest-pep8 pytest-cov pytest-xdist codecov" + - "pip install pytest pytest-pep8 pytest-cov pytest-xdist pytest-rerunfailures codecov" # list package versions - "conda list" # Copy the windows fftw header file @@ -66,11 +66,13 @@ build: false test_script: - "%CMD_IN_ENV% python setup.py build_ext --inplace" - "%CMD_IN_ENV% python setup.py install" - - "%CMD_IN_ENV% py.test -n4" + - "%CMD_IN_ENV% py.test -n4 -m \"not serial and not network\"" + - "%CMD_IN_ENV% py.test -m \"serial and not network\" --cov-append" +# - "%CMD_IN_ENV% py.test -n4 -m \"network\" --cov-append" after_test: # - "coverage combine" - - "powershell copy-item .coverage ..\\.coverage.empty" - - "cd .." - - "coverage combine" + # - "powershell copy-item .coverage ..\\.coverage.empty" + # - "cd .." + # - "coverage combine" - "codecov" diff --git a/conftest.py b/conftest.py index 67fe04dd3..17071e019 100644 --- a/conftest.py +++ b/conftest.py @@ -1,3 +1,113 @@ +import os +import shutil +from os.path import join, dirname + +import pytest + +# ------------------------- Paths constants +# these are added to pytest namespace for convinience if finding things +# eg test data + +PKG_PATH = join(dirname(__file__), 'eqcorrscan') +TEST_PATH = join(PKG_PATH, 'tests') +TEST_DATA_PATH = join(TEST_PATH, 'test_data') + + +# --------------- pytest configuration + + +def pytest_addoption(parser): + parser.addoption("--runslow", action="store_true", + help="run slow tests") + + +# ------------------ session fixtures + + +@pytest.fixture(scope='session', autouse=True) +def clean_up_test_files(): + """ cleanup the file droppings produced by doc tests. + This fixture will run after all other tests have finished """ + files_to_kill = [ + 'test_csv_write.csv', + 'test_family.tgz', + 'test_quakeml.ml', + 'test_tar_write.tgz', + 'test_template.tgz', + 'test_template_read.tgz', + 'test_tribe.tgz', + 'test_waveforms.ms', + 'mag_calc.out', + 'station.dat', + 'test_waveform.ms', + '01-0410-35L.S201309', + '04-0007-55R.S201601', + '04-0045-52L.S201601', + 'dt.cc', + 'dt.cc2', + 'dt.ct', + 'dt.ct2', + 'phase.dat' + ] + + yield + + # remove files + for fi in files_to_kill: + if os.path.isfile(fi): + os.remove(fi) + + +@pytest.fixture(scope='session', autouse=True) +def clean_up_test_directories(): + """ cleanup the file droppings produced by doc tests. + This fixture will run after all other tests have finished """ + directories_to_kill = [ + 'temp1', + 'temp2', + 'test_family', + 'test_party_out', + 'test_tar_write', + 'tmp1', + ] + + yield + + # remove files + for directory in directories_to_kill: + if os.path.isdir(directory): + shutil.rmtree(directory) + + +# ------------- add objects to global pytest scope + + +def append_name(list_like): + """ + Decorator to append a function name to an object with an append method. + + :param list_like: Any object with append method + :return: func + """ + + def _append_func_name(func): + list_like.append(func.__name__) + return func + + return _append_func_name + + +# add key variables to the pytest name space. All these can now be accessed +# within any test by using getattr notation on the pytest package itself +# eg pytest.test_path is bound to TEST_PATH +def pytest_namespace(): + odict = {'test_path': TEST_PATH, + 'test_data_path': TEST_DATA_PATH, + 'pkg_path': PKG_PATH, + 'append_name': append_name, + } + return odict + # Over-ride the -n auto in travis as this goes of the wall # import sys # import os @@ -11,8 +121,3 @@ # else: # args[:] = args + ["-n2"] # print(args) - - -def pytest_addoption(parser): - parser.addoption("--runslow", action="store_true", - help="run slow tests") diff --git a/eqcorrscan/__init__.py b/eqcorrscan/__init__.py index 4093ca7c3..41ab960f6 100755 --- a/eqcorrscan/__init__.py +++ b/eqcorrscan/__init__.py @@ -7,15 +7,13 @@ GNU Lesser General Public License, Version 3 (https://www.gnu.org/copyleft/lesser.html) """ -import sys import importlib +import sys import warnings __all__ = ['core', 'utils', 'tutorials'] - -__version__ = '0.2.5' - +__version__ = '0.2.6' # Cope with changes to name-space to remove most of the camel-case _import_map = { @@ -35,6 +33,7 @@ class EQcorrscanRestructureAndLoad(object): """ Path finder and module loader for transitioning """ + def find_module(self, fullname, path=None): # Compatibility with namespace paths. if hasattr(path, "_path"): @@ -87,4 +86,5 @@ def load_module(self, name): if __name__ == '__main__': import doctest + doctest.testmod(exclude_empty=True) diff --git a/eqcorrscan/core/bright_lights.py b/eqcorrscan/core/bright_lights.py index b10b77d01..f13a17551 100644 --- a/eqcorrscan/core/bright_lights.py +++ b/eqcorrscan/core/bright_lights.py @@ -631,6 +631,7 @@ def brightness(stations, nodes, lags, stream, threshold, thresh_type, import matplotlib.pyplot as plt plt.ioff() from eqcorrscan.utils import plotting + from eqcorrscan.utils.debug_log import debug_print # Check that we actually have the correct stations realstations = [] for station in stations: @@ -750,9 +751,8 @@ def brightness(stations, nodes, lags, stream, threshold, thresh_type, # Generate a catalog of detections # detections_cat = Catalog() for j, detection in enumerate(detections): - if debug > 3: - print('Converting for detection ' + str(j) + ' of ' + - str(len(detections))) + debug_print('Converting for detection %i of %i' + % (j, len(detections)), 3, debug) # Create an event for each detection event = Event() # Set up some header info for the event @@ -791,8 +791,7 @@ def brightness(stations, nodes, lags, stream, threshold, thresh_type, time=tr.stats.starttime + detect_lag + detection.detect_time + pre_pick, onset='emergent', evalutation_mode='automatic')) - if debug > 0: - print('Generating template for detection: ' + str(j)) + debug_print('Generating template for detection: %i' % j, 0, debug) template = template_gen( picks=event.picks, st=copy_of_stream, length=template_length, swin='all') @@ -811,17 +810,15 @@ def brightness(stations, nodes, lags, stream, threshold, thresh_type, print('---------------------------------coherence LEVEL: ' + str(temp_coher)) coherent = True - elif debug > 0: - print('Template was incoherent, coherence level: ' + - str(temp_coher)) + debug_print('Template was incoherent, coherence level: ' + + str(temp_coher), 0, debug) coherent = False del copy_of_stream, tr, template if coherent: templates.append(obsread(template_name)) nodesout += [node] good_detections.append(detection) - elif debug > 0: - print('No template for you') + debug_print('No template for you', 0, debug) # detections_cat += event if plotvar: good_detections = [(cum_net_trace[-1].stats.starttime + diff --git a/eqcorrscan/core/lag_calc.py b/eqcorrscan/core/lag_calc.py index d242a7af0..473045eee 100644 --- a/eqcorrscan/core/lag_calc.py +++ b/eqcorrscan/core/lag_calc.py @@ -26,6 +26,7 @@ from obspy.core.event import ResourceIdentifier, Comment from eqcorrscan.utils.plotting import plot_repicked, detection_multiplot +from eqcorrscan.utils.debug_log import debug_print class LagCalcError(Exception): @@ -158,8 +159,8 @@ def _channel_loop(detection, template, min_cc, detection_id, interpolate, i, temp_net = tr.stats.network temp_sta = tr.stats.station temp_chan = tr.stats.channel - if debug > 3: - print('Working on: %s.%s.%s' % (temp_net, temp_sta, temp_chan)) + debug_print('Working on: %s.%s.%s' % (temp_net, temp_sta, temp_chan), + 3, debug) image = detection.select(station=temp_sta, channel=temp_chan) if len(image) == 0: print('No match in image.') @@ -193,13 +194,11 @@ def _channel_loop(detection, template, min_cc, detection_id, interpolate, i, cc_max = np.amax(ccc) picktime = image[0].stats.starttime + ( np.argmax(ccc) * image[0].stats.delta) - if debug > 3: - print('Maximum cross-corr=%s' % cc_max) + debug_print('Maximum cross-corr=%s' % cc_max, 3, debug) checksum += cc_max used_chans += 1 if cc_max < min_cc: - if debug > 3: - print('Correlation below threshold, not used') + debug_print('Correlation below threshold, not used', 3, debug) continue cccsum += cc_max # Perhaps weight each pick by the cc val or cc val^2? @@ -209,9 +208,8 @@ def _channel_loop(detection, template, min_cc, detection_id, interpolate, i, # Only take the S-pick with the best correlation elif temp_chan[-1] in horizontal_chans: phase = 'S' - if debug > 4: - print('Making S-pick on: %s.%s.%s' % - (temp_net, temp_sta, temp_chan)) + debug_print('Making S-pick on: %s.%s.%s' % + (temp_net, temp_sta, temp_chan), 4, debug) if temp_sta not in s_stachans.keys(): s_stachans[temp_sta] = ((temp_chan, np.amax(ccc), picktime)) @@ -298,8 +296,7 @@ def _day_loop(detection_streams, template, min_cc, detections, num_cores = len(detection_streams) if parallel: pool = Pool(processes=num_cores) - if debug > 4: - print('Made pool of %i workers' % num_cores) + debug_print('Made pool of %i workers' % num_cores, 4, debug) # Parallel generation of events for each detection: # results will be a list of (i, event class) results = [pool.apply_async( @@ -577,8 +574,8 @@ def lag_calc(detections, detect_data, template_names, templates, template_detections = [detection for detection in detections if detection.template_name == template[0]] t_delays = [d for d in delays if d[0] == template[0]][0][1] - if debug > 2: - print('There are %i detections' % len(template_detections)) + debug_print( + 'There are %i detections' % len(template_detections), 2, debug) detect_streams = _prepare_data( detect_data=detect_data, detections=template_detections, template=template, delays=t_delays, shift_len=shift_len, diff --git a/eqcorrscan/core/match_filter.py b/eqcorrscan/core/match_filter.py index 0e332654c..0271fbf2f 100644 --- a/eqcorrscan/core/match_filter.py +++ b/eqcorrscan/core/match_filter.py @@ -18,32 +18,43 @@ from __future__ import print_function from __future__ import unicode_literals -import numpy as np - -import warnings import ast -import os -import time +import contextlib import copy import getpass +import glob +import os import re -import tarfile import shutil +import tarfile import tempfile -import glob - -from multiprocessing import cpu_count +import time +import warnings from collections import Counter +from os.path import join + +import numpy as np from obspy import Trace, Catalog, UTCDateTime, Stream, read, read_events -from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier from obspy.core.event import Comment, WaveformStreamID +from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier +from eqcorrscan.core import template_gen +from eqcorrscan.core.lag_calc import lag_calc +from eqcorrscan.utils.catalog_utils import _get_origin +from eqcorrscan.utils.correlate import get_array_xcorr, get_stream_xcorr +from eqcorrscan.utils.debug_log import debug_print from eqcorrscan.utils.findpeaks import find_peaks2_short, decluster from eqcorrscan.utils.plotting import cumulative_detections from eqcorrscan.utils.pre_processing import dayproc, shortproc -from eqcorrscan.utils.catalog_utils import _get_origin -from eqcorrscan.core import template_gen -from eqcorrscan.core.lag_calc import lag_calc + + +@contextlib.contextmanager +def temporary_directory(): + """ make a temporary directory, yeild its name, cleanup on exit """ + dir_name = tempfile.mkdtemp() + yield dir_name + if os.path.exists(dir_name): + shutil.rmtree(dir_name) def _spike_test(stream, percent=0.99, multiplier=1e6): @@ -75,6 +86,7 @@ class MatchFilterError(Exception): """ Default error for match-filter errors. """ + def __init__(self, value): """ Raise error. @@ -111,6 +123,7 @@ class Party(object): """ Container for multiple Family objects. """ + def __init__(self, families=None): """Instantiate the Party object.""" self.families = [] @@ -511,7 +524,7 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor'): detect_info = [(d[1], _total_microsec(d[0].datetime, min_det.datetime)) for d in detect_info] peaks_out, inds_out = decluster( - peaks=detect_info, trig_int=trig_int * 10**6, return_ind=True) + peaks=detect_info, trig_int=trig_int * 10 ** 6, return_ind=True) # Trig_int must be converted from seconds to micro-seconds declustered_detections = [all_detections[ind] for ind in inds_out] # Convert this list into families @@ -591,26 +604,26 @@ def write(self, filename, format='tar'): for detection in family.detections: detection.write(fname=filename, append=True) elif format.lower() == 'tar': - if os.path.isdir(filename) or os.path.isfile(filename): + if os.path.exists(filename): raise IOError('Will not overwrite existing file: %s' % filename) - os.makedirs(filename) - Tribe([f.template for f in self.families]).write( - filename=filename, compress=False) - all_cat = Catalog() - for family in self.families: - all_cat += family.catalog - if not len(all_cat) == 0: - all_cat.write(filename + os.sep + 'catalog.xml', - format='QUAKEML') - for i, family in enumerate(self.families): - print('Writing family %i' % i) - _write_family( - family=family, filename=filename + os.sep + - family.template.name + '_detections.csv') - with tarfile.open(filename + '.tgz', "w:gz") as tar: - tar.add(filename, arcname=os.path.basename(filename)) - shutil.rmtree(filename) + # os.makedirs(filename) + with temporary_directory() as temp_dir: + Tribe([f.template for f in self.families]).write( + filename=temp_dir, compress=False) + all_cat = Catalog() + for family in self.families: + all_cat += family.catalog + if not len(all_cat) == 0: + all_cat.write(join(temp_dir, 'catalog.xml'), + format='QUAKEML') + for i, family in enumerate(self.families): + print('Writing family %i' % i) + name = family.template.name + '_detections.csv' + name_to_write = join(temp_dir, name) + _write_family(family=family, filename=name_to_write) + with tarfile.open(filename + '.tgz', "w:gz") as tar: + tar.add(temp_dir, arcname=os.path.basename(filename)) else: warnings.warn('Writing only the catalog component, metadata ' 'will not be preserved') @@ -646,10 +659,8 @@ def read(self, filename=None): all_cat = read_events(party_dir + os.sep + 'catalog.xml') else: all_cat = Catalog() - for family_file in glob.glob(party_dir + os.sep + '*_detections.csv'): - template = [t for t in tribe - if t.name == family_file.split(os.sep)[-1]. - split('_detections.csv')[0]] + for family_file in glob.glob(join(party_dir, '*_detections.csv')): + template = [t for t in tribe if _templates_match(t, family_file)] if len(template) == 0: raise MatchFilterError( 'Missing template for detection file: ' + family_file) @@ -754,7 +765,7 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, new_group = [master.template.copy()] new_det_group = copy.deepcopy(master.detections) for slave in self.families: - if master.template.same_processing(slave.template) and\ + if master.template.same_processing(slave.template) and \ master.template != slave.template: slave_chans = [ (tr.stats.station, @@ -799,7 +810,7 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, temp_cat = lag_calc( detections=det_group, detect_data=processed_stream, template_names=[t.name for t in group], - templates=[t.st for t in group], shift_len=shift_len, + templates=[t.st for t in group], shift_len=shift_len, min_cc=min_cc, horizontal_chans=horizontal_chans, vertical_chans=vertical_chans, cores=cores, interpolate=interpolate, plot=plot, parallel=parallel, @@ -874,6 +885,7 @@ class Family(object): :param catalog: Catalog of detections, with information for the individual detections. """ + def __init__(self, template, detections=None, catalog=None): """Instantiation of Family object.""" self.template = template @@ -1397,6 +1409,7 @@ class Template(object): Template object holder. Contains waveform data and metadata parameters used to generate the template. """ + def __init__(self, name=None, st=None, lowcut=None, highcut=None, samp_rate=None, filt_order=None, process_length=None, prepick=None, event=None): @@ -1423,11 +1436,11 @@ def __init__(self, name=None, st=None, lowcut=None, highcut=None, self.prepick = prepick if event is not None: if "eqcorrscan_template_" + temp_name not in \ - [c.text for c in event.comments]: + [c.text for c in event.comments]: event.comments.append(Comment( - text="eqcorrscan_template_" + temp_name, - creation_info=CreationInfo(agency='eqcorrscan', - author=getpass.getuser()))) + text="eqcorrscan_template_" + temp_name, + creation_info=CreationInfo(agency='eqcorrscan', + author=getpass.getuser()))) self.event = event def __repr__(self): @@ -1516,8 +1529,9 @@ def __eq__(self, other): """ for key in self.__dict__.keys(): if key == 'st': - if isinstance(self.st, Stream) and \ - isinstance(other.st, Stream): + self_is_stream = isinstance(self.st, Stream) + other_is_stream = isinstance(other.st, Stream) + if self_is_stream and other_is_stream: for tr, oth_tr in zip(self.st.sort(), other.st.sort()): if not np.array_equal(tr.data, oth_tr.data): @@ -1528,24 +1542,20 @@ def __eq__(self, other): 'calib']: if tr.stats[trkey] != oth_tr.stats[trkey]: return False - elif isinstance( - self.st, Stream) and not isinstance(other.st, Stream): + elif self_is_stream and not other_is_stream: return False - elif not isinstance( - self.st, Stream) and isinstance(other.st, Stream): + elif not self_is_stream and other_is_stream: return False elif key == 'event': - if isinstance( - self.event, Event) and isinstance(other.event, Event): + self_is_event = isinstance(self.event, Event) + other_is_event = isinstance(other.event, Event) + if self_is_event and other_is_event: if not _test_event_similarity( self.event, other.event, verbose=False): return False - elif isinstance( - self.event, Event) and not isinstance( - other.event, Event): + elif self_is_event and not other_is_event: return False - elif not isinstance( - self.event, Event) and isinstance(other.event, Event): + elif not self_is_event and other_is_event: return False elif not self.__dict__[key] == other.__dict__[key]: return False @@ -1683,6 +1693,7 @@ def read(self, filename): def detect(self, stream, threshold, threshold_type, trig_int, plotvar, pre_processed=False, daylong=False, parallel_process=True, + xcorr_func=None, concurrency=None, cores=None, ignore_length=False, overlap="calculate", debug=0): """ Detect using a single template within a continuous stream. @@ -1720,6 +1731,18 @@ def detect(self, stream, threshold, threshold_type, trig_int, plotvar, over other methods. :type parallel_process: bool :param parallel_process: + :type xcorr_func: str or callable + :param xcorr_func: + A str of a registered xcorr function or a callable for implementing + a custom xcorr function. For more details see + :func:`eqcorrscan.utils.correlate.register_array_xcorr`. + :type concurrency: str + :param concurrency: + The type of concurrency to apply to the xcorr function. Options are + 'multithread', 'multiprocess', 'concurrent'. For more details see + :func:`eqcorrscan.utils.correlate.get_stream_xcorr`. + :type cores: int + :param cores: Number of workers for processing and detection. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data @@ -1798,11 +1821,12 @@ def detect(self, stream, threshold, threshold_type, trig_int, plotvar, See tutorials for example. """ party = _group_detect( - templates=[self], stream=stream.copy(), threshold=threshold, - threshold_type=threshold_type, trig_int=trig_int, - plotvar=plotvar, pre_processed=pre_processed, daylong=daylong, - parallel_process=parallel_process, - ignore_length=ignore_length, overlap=overlap, debug=debug) + templates=[self], stream=stream.copy(), threshold=threshold, + threshold_type=threshold_type, trig_int=trig_int, + plotvar=plotvar, pre_processed=pre_processed, daylong=daylong, + parallel_process=parallel_process, xcorr_func=xcorr_func, + concurrency=concurrency, cores=cores, ignore_length=ignore_length, + overlap=overlap, debug=debug) return party[0] def construct(self, method, name, lowcut, highcut, samp_rate, filt_order, @@ -1897,6 +1921,7 @@ def construct(self, method, name, lowcut, highcut, samp_rate, filt_order, class Tribe(object): """Holder for multiple templates.""" + def __init__(self, templates=None): self.templates = [] if isinstance(templates, Template): @@ -2130,8 +2155,8 @@ def _par_write(self, dirname): :type dirname: str :param dirname: Directory to write the parameter file to. """ - with open(dirname + '/' + - 'template_parameters.csv', 'w') as parfile: + filename = dirname + '/' + 'template_parameters.csv' + with open(filename, 'w') as parfile: for template in self.templates: for key in template.__dict__.keys(): if key not in ['st', 'event']: @@ -2224,7 +2249,8 @@ def cluster(self, method, **kwargs): return tribes def detect(self, stream, threshold, threshold_type, trig_int, plotvar, - daylong=False, parallel_process=True, ignore_length=False, + daylong=False, parallel_process=True, xcorr_func=None, + concurrency=None, cores=None, ignore_length=False, group_size=None, overlap="calculate", debug=0): """ Detect using a Tribe of templates within a continuous stream. @@ -2255,6 +2281,18 @@ def detect(self, stream, threshold, threshold_type, trig_int, plotvar, over other methods. :type parallel_process: bool :param parallel_process: + :type xcorr_func: str or callable + :param xcorr_func: + A str of a registered xcorr function or a callable for implementing + a custom xcorr function. For more information see: + :func:`eqcorrscan.utils.correlate.register_array_xcorr` + :type concurrency: str + :param concurrency: + The type of concurrency to apply to the xcorr function. Options are + 'multithread', 'multiprocess', 'concurrent'. For more details see + :func:`eqcorrscan.utils.correlate.get_stream_xcorr` + :type cores: int + :param cores: Number of workers for procesisng and detection. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data @@ -2284,6 +2322,10 @@ def detect(self, stream, threshold, threshold_type, trig_int, plotvar, .. Note:: `stream` must not be pre-processed. + .. warning:: + Picks included in the output Party.get_catalog() will not be + corrected for pre-picks in the template. + .. warning:: Plotting within the match-filter routine uses the Agg backend with interactive plotting turned off. This is because the function @@ -2371,6 +2413,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plotvar, threshold_type=threshold_type, trig_int=trig_int, plotvar=plotvar, group_size=group_size, pre_processed=False, daylong=daylong, parallel_process=parallel_process, + xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=ignore_length, overlap=overlap, debug=debug) party += group_party for family in party: @@ -2381,7 +2424,8 @@ def detect(self, stream, threshold, threshold_type, trig_int, plotvar, def client_detect(self, client, starttime, endtime, threshold, threshold_type, trig_int, plotvar, daylong=False, - parallel_process=True, ignore_length=False, + parallel_process=True, xcorr_func=None, + concurrency=None, cores=None, ignore_length=False, group_size=None, debug=0, return_stream=False): """ Detect using a Tribe of templates within a continuous stream. @@ -2416,6 +2460,18 @@ def client_detect(self, client, starttime, endtime, threshold, over other methods. :type parallel_process: bool :param parallel_process: + :type xcorr_func: str or callable + :param xcorr_func: + A str of a registered xcorr function or a callable for implementing + a custom xcorr function. For more information see: + :func:`eqcorrscan.utils.correlate.register_array_xcorr` + :type concurrency: str + :param concurrency: + The type of concurrency to apply to the xcorr function. Options are + 'multithread', 'multiprocess', 'concurrent'. For more details see + :func:`eqcorrscan.utils.correlate.get_stream_xcorr` + :type cores: int + :param cores: Number of workers for processing and detection. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data @@ -2439,6 +2495,10 @@ def client_detect(self, client, starttime, endtime, threshold, :class:`eqcorrscan.core.match_filter.Party` of Families of detections. + .. warning:: + Picks included in the output Party.get_catalog() will not be + corrected for pre-picks in the template. + .. Note:: Ensures that data overlap between loops, which will lead to no missed detections at data start-stop points (see note for @@ -2498,25 +2558,25 @@ def client_detect(self, client, starttime, endtime, threshold, for template in self.templates: for tr in template.st: if tr.stats.network not in [None, '']: - chan_id = (tr.stats.network, ) + chan_id = (tr.stats.network,) else: - chan_id = ('*', ) + chan_id = ('*',) if tr.stats.station not in [None, '']: - chan_id += (tr.stats.station, ) + chan_id += (tr.stats.station,) else: - chan_id += ('*', ) + chan_id += ('*',) if tr.stats.location not in [None, '']: - chan_id += (tr.stats.location, ) + chan_id += (tr.stats.location,) else: - chan_id += ('*', ) + chan_id += ('*',) if tr.stats.channel not in [None, '']: if len(tr.stats.channel) == 2: chan_id += (tr.stats.channel[0] + '?' + - tr.stats.channel[-1], ) + tr.stats.channel[-1],) else: - chan_id += (tr.stats.channel, ) + chan_id += (tr.stats.channel,) else: - chan_id += ('*', ) + chan_id += ('*',) template_channel_ids.append(chan_id) template_channel_ids = list(set(template_channel_ids)) if return_stream: @@ -2541,7 +2601,8 @@ def client_detect(self, client, starttime, endtime, threshold, stream=st, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plotvar=plotvar, daylong=daylong, - parallel_process=parallel_process, + parallel_process=parallel_process, xcorr_func=xcorr_func, + concurrency=concurrency, cores=cores, ignore_length=ignore_length, group_size=group_size, overlap=None, debug=debug) if return_stream: @@ -2589,6 +2650,11 @@ def construct(self, method, lowcut, highcut, samp_rate, filt_order, Methods: `from_contbase`, `from_sfile` and `from_sac` are not supported by Tribe.construct and must use Template.construct. + .. Note:: + The Method `multi_template_gen` is not supported because the + processing parameters for the stream are not known. Use + `from_meta_file` instead. + .. Note:: Templates will be named according to their start-time. """ if method in ['from_contbase', 'from_sfile', 'from_sac']: @@ -2611,7 +2677,7 @@ def construct(self, method, lowcut, highcut, samp_rate, filt_order, print('Empty Template') continue t.st = template - t.name = template.sort(['starttime'])[0].\ + t.name = template.sort(['starttime'])[0]. \ stats.starttime.strftime('%Y_%m_%dt%H_%M_%S') t.lowcut = lowcut t.highcut = highcut @@ -2713,19 +2779,16 @@ def __str__(self): def __eq__(self, other): for key in self.__dict__.keys(): + self_is_event = isinstance(self.event, Event) + other_is_event = isinstance(other.event, Event) if key == 'event': - if isinstance(self.event, Event) and \ - isinstance(other.event, Event): + if self_is_event and other_is_event: if not _test_event_similarity( self.event, other.event, verbose=False): return False - elif isinstance( - self.event, Event) and not isinstance( - other.event, Event): + elif self_is_event and not other_is_event: return False - elif not isinstance( - self.event, Event) and isinstance( - other.event, Event): + elif not self_is_event and other_is_event: return False elif self.__dict__[key] != other.__dict__[key]: return False @@ -2814,7 +2877,18 @@ def _total_microsec(t1, t2): -31536000000000 """ td = t1 - t2 - return (td.seconds + td.days * 24 * 3600) * 10**6 + td.microseconds + return (td.seconds + td.days * 24 * 3600) * 10 ** 6 + td.microseconds + + +def _templates_match(t, family_file): + """ + Return True if a tribe matches a family file path. + + :type t: Tribe + :type family_file: str + :return: bool + """ + return t.name == family_file.split(os.sep)[-1].split('_detections.csv')[0] def _test_event_similarity(event_1, event_2, verbose=False): @@ -2864,11 +2938,11 @@ def _test_event_similarity(event_1, event_2, verbose=False): return False if arr_1["distance"] and round( arr_1["distance"]) != round(arr_2["distance"]): - if verbose: - print('%s does not match %s for key %s' % - (arr_1[arr_key], arr_2[arr_key], - arr_key)) - return False + if verbose: + print('%s does not match %s for key %s' % + (arr_1[arr_key], arr_2[arr_key], + arr_key)) + return False # Check picks if len(event_1.picks) != len(event_2.picks): if verbose: @@ -2912,8 +2986,8 @@ def _test_event_similarity(event_1, event_2, verbose=False): if verbose: print('Channel codes do not match') return False - if pick_1[key].channel_code[-1] !=\ - pick_2[key].channel_code[-1]: + if pick_1[key].channel_code[-1] != \ + pick_2[key].channel_code[-1]: if verbose: print('Channel codes do not match') return False @@ -2940,8 +3014,8 @@ def _test_event_similarity(event_1, event_2, verbose=False): if verbose: print('Channel codes do not match') return False - if pick_1[key].channel_code[-1] !=\ - pick_2[key].channel_code[-1]: + if pick_1[key].channel_code[-1] != \ + pick_2[key].channel_code[-1]: if verbose: print('Channel codes do not match') return False @@ -2950,8 +3024,9 @@ def _test_event_similarity(event_1, event_2, verbose=False): def _group_detect(templates, stream, threshold, threshold_type, trig_int, plotvar, group_size=None, pre_processed=False, daylong=False, - parallel_process=True, ignore_length=False, - overlap="calculate", debug=0): + parallel_process=True, xcorr_func=None, concurrency=None, + cores=None, ignore_length=False, overlap="calculate", + debug=0): """ Pre-process and compute detections for a group of templates. @@ -2997,6 +3072,18 @@ def _group_detect(templates, stream, threshold, threshold_type, trig_int, over other methods. :type parallel_process: bool :param parallel_process: + :type xcorr_func: str or callable + :param xcorr_func: + A str of a registered xcorr function or a callable for implementing + a custom xcorr function. For more details see: + :func:`eqcorrscan.utils.correlate.register_array_xcorr` + :type concurrency: str + :param concurrency: + The type of concurrency to apply to the xcorr function. Options are + 'multithread', 'multiprocess', 'concurrent'. For more details see + :func:`eqcorrscan.utils.correlate.get_stream_xcorr` + :type cores: int + :param cores: Number of workers for processing and correlation. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data @@ -3018,10 +3105,6 @@ def _group_detect(templates, stream, threshold, threshold_type, trig_int, :return: :class:`eqcorrscan.core.match_filter.Party` of families of detections. """ - if parallel_process: - ncores = cpu_count() - else: - ncores = None master = templates[0] # Check that they are all processed the same. lap = 0.0 @@ -3041,7 +3124,7 @@ def _group_detect(templates, stream, threshold, threshold_type, trig_int, if not pre_processed: st = _group_process( template_group=templates, parallel=parallel_process, debug=debug, - cores=False, stream=stream, daylong=daylong, + cores=cores, stream=stream, daylong=daylong, ignore_length=ignore_length, overlap=overlap) else: warnings.warn('Not performing any processing on the ' @@ -3056,9 +3139,9 @@ def _group_detect(templates, stream, threshold, threshold_type, trig_int, else: n_groups = 1 for st_chunk in st: - if debug > 0: - print('Computing detections between %s and %s' % - (st_chunk[0].stats.starttime, st_chunk[0].stats.endtime)) + debug_print( + 'Computing detections between %s and %s' % + (st_chunk[0].stats.starttime, st_chunk[0].stats.endtime), 0, debug) st_chunk.trim(starttime=st_chunk[0].stats.starttime, endtime=st_chunk[0].stats.endtime) for tr in st_chunk: @@ -3077,8 +3160,9 @@ def _group_detect(templates, stream, threshold, threshold_type, trig_int, detections += match_filter( template_names=[t.name for t in template_group], template_list=[t.st for t in template_group], st=st_chunk, + xcorr_func=xcorr_func, concurrency=concurrency, threshold=threshold, threshold_type=threshold_type, - trig_int=trig_int, plotvar=plotvar, debug=debug, cores=ncores) + trig_int=trig_int, plotvar=plotvar, debug=debug, cores=cores) for template in template_group: family = Family(template=template, detections=[]) for detection in detections: @@ -3295,10 +3379,9 @@ def _read_family(fname, all_cat): if key == 'event': if len(all_cat) == 0: continue - det_dict.update( - {'event': [e for e in all_cat - if str(e.resource_id). - split('/')[-1] == value][0]}) + el = [e for e in all_cat + if str(e.resource_id).split('/')[-1] == value][0] + det_dict.update({'event': el}) elif key == 'detect_time': det_dict.update( {'detect_time': UTCDateTime(value)}) @@ -3368,7 +3451,7 @@ def read_detections(fname): continue # Skip any repeated headers detection = line.rstrip().split('; ') detection[1] = UTCDateTime(detection[1]) - detection[2] = int(detection[2]) + detection[2] = int(float(detection[2])) detection[3] = ast.literal_eval(detection[3]) detection[4] = float(detection[4]) detection[5] = float(detection[5]) @@ -3490,18 +3573,18 @@ def normxcorr2(template, image): correlation of the image with the template. :rtype: numpy.ndarray """ - from eqcorrscan.utils.correlate import fftw_normxcorr + array_xcorr = get_array_xcorr() # Check that we have been passed numpy arrays if type(template) != np.ndarray or type(image) != np.ndarray: print('You have not provided numpy arrays, I will not convert them') return 'NaN' if len(template) > len(image): - ccc = fftw_normxcorr( + ccc = array_xcorr( templates=np.array([image]).astype(np.float32), stream=template.astype(np.float32), pads=[0], threaded=False)[0][0] else: - ccc = fftw_normxcorr( + ccc = array_xcorr( templates=np.array([template]).astype(np.float32), stream=image.astype(np.float32), pads=[0], threaded=False)[0][0] ccc = ccc.reshape((1, len(ccc))) @@ -3509,7 +3592,8 @@ def normxcorr2(template, image): def match_filter(template_names, template_list, st, threshold, - threshold_type, trig_int, plotvar, plotdir='.', cores=None, + threshold_type, trig_int, plotvar, plotdir='.', + xcorr_func=None, concurrency=None, cores=None, debug=0, plot_format='png', output_cat=False, output_event=True, extract_detections=False, arg_check=True): @@ -3550,6 +3634,16 @@ def match_filter(template_names, template_list, st, threshold, :param plotdir: Path to plotting folder, plots will be output here, defaults to run location. + :type xcorr_func: str or callable + :param xcorr_func: + A str of a registered xcorr function or a callable for implementing + a custom xcorr function. For more information see: + :func:`eqcorrscan.utils.correlate.register_array_xcorr` + :type concurrency: str + :param concurrency: + The type of concurrency to apply to the xcorr function. Options are + 'multithread', 'multiprocess', 'concurrent'. For more details see + :func:`eqcorrscan.utils.correlate.get_stream_xcorr` :type cores: int :param cores: Number of cores to use :type debug: int @@ -3654,7 +3748,7 @@ def match_filter(template_names, template_list, st, threshold, length is the number of channels within this template. .. note:: - The output_cat flag will create an :class:`obspy.core.eventCatalog` + The output_cat flag will create an :class:`obspy.core.event.Catalog` containing one event for each :class:`eqcorrscan.core.match_filter.Detection`'s generated by match_filter. Each event will contain a number of comments dealing @@ -3666,15 +3760,40 @@ def match_filter(template_names, template_list, st, threshold, prepick times inherent in each template. For example, if a template trace starts 0.1 seconds before the actual arrival of that phase, then the pick time generated by match_filter for that phase will be - 0.1 seconds early. We are working on a solution that will involve - saving templates alongside associated metadata. + 0.1 seconds early. + .. Note:: xcorr_func can be used as follows: + + .. rubric:: Example + >>> import obspy + >>> import numpy as np + >>> from eqcorrscan.core.match_filter import match_filter + >>> from eqcorrscan.utils.correlate import time_multi_normxcorr + >>> # define a custom xcorr function + >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs): + ... # Just to keep example short call other xcorr function + ... # in practice you would define your own function here + ... print('calling custom xcorr function') + ... return time_multi_normxcorr(templates, stream, pads) + >>> # generate some toy templates and stream + >>> random = np.random.RandomState(42) + >>> template = obspy.read() + >>> stream = obspy.read() + >>> for num, tr in enumerate(stream): # iter stream and embed templates + ... data = tr.data + ... tr.data = random.randn(6000) * 5 + ... tr.data[100: 100 + len(data)] = data + >>> # call match_filter ane ensure the custom function is used + >>> detections = match_filter( + ... template_names=['1'], template_list=[template], st=stream, + ... threshold=.5, threshold_type='absolute', trig_int=1, plotvar=False, + ... xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS + calling custom xcorr function... """ _spike_test(st) import matplotlib matplotlib.use('Agg') from eqcorrscan.utils.plotting import _match_filter_plot - from eqcorrscan.utils.correlate import multichannel_normxcorr if arg_check: # Check the arguments to be nice - if arguments wrong type the parallel # output for the error won't be useful @@ -3686,7 +3805,7 @@ def match_filter(template_names, template_list, st, threshold, raise MatchFilterError('Not the same number of templates as names') for template in template_list: if not type(template) == Stream: - msg = 'template in template_list must be of type: ' +\ + msg = 'template in template_list must be of type: ' + \ 'obspy.core.stream.Stream' raise MatchFilterError(msg) if not type(st) == Stream: @@ -3725,11 +3844,10 @@ def match_filter(template_names, template_list, st, threshold, data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) - if debug >= 3: - print('I have template info for these stations:') - print(template_stachan) - print('I have daylong data for these stations:') - print(data_stachan) + debug_print('I have template info for these stations:\n' + + template_stachan.__str__() + + '\nI have daylong data for these stations:\n' + + data_stachan.__str__(), 3, debug) # Perform a check that the continuous data are all the same length min_start_time = min([tr.stats.starttime for tr in stream]) max_end_time = max([tr.stats.endtime for tr in stream]) @@ -3752,8 +3870,8 @@ def match_filter(template_names, template_list, st, threshold, 'not currently supported' % _template_names[i]) raise MatchFilterError(msg) outtic = time.clock() - if debug >= 2: - print('Ensuring all template channels have matches in continuous data') + debug_print('Ensuring all template channels have matches in' + ' continuous data', 2, debug) template_stachan = {} # Work out what station-channel pairs are in the templates, including # duplicate station-channel pairs. We will use this information to fill @@ -3766,12 +3884,11 @@ def match_filter(template_names, template_list, st, threshold, tr.stats.location, tr.stats.channel)) stachans_in_template = dict(Counter(stachans_in_template)) for stachan in stachans_in_template.keys(): + stachans = stachans_in_template[stachan] if stachan not in template_stachan.keys(): - template_stachan.update({stachan: - stachans_in_template[stachan]}) + template_stachan.update({stachan: stachans}) elif stachans_in_template[stachan] > template_stachan[stachan]: - template_stachan.update({stachan: - stachans_in_template[stachan]}) + template_stachan.update({stachan: stachans}) # Remove un-matched channels from templates. _template_stachan = copy.deepcopy(template_stachan) for stachan in template_stachan.keys(): @@ -3825,7 +3942,7 @@ def match_filter(template_names, template_list, st, threshold, network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3])) if number_of_channels < template_stachan[stachan]: - missed_channels = template_stachan[stachan] -\ + missed_channels = template_stachan[stachan] - \ number_of_channels nulltrace = Trace() nulltrace.stats.update( @@ -3846,25 +3963,22 @@ def match_filter(template_names, template_list, st, threshold, 'lengths, report this error.') templates = _templates _template_names = used_template_names - if debug >= 2: - print('Starting the correlation run for these data') - if debug >= 3: - for template in templates: - print(template) - print(stream) + debug_print('Starting the correlation run for these data', 2, debug) + for template in templates: + debug_print(template.__str__(), 3, debug) + debug_print(stream.__str__(), 3, debug) + multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency) [cccsums, no_chans, chans] = multichannel_normxcorr( templates=templates, stream=stream, cores=cores) if len(cccsums[0]) == 0: raise MatchFilterError('Correlation has not run, zero length cccsum') outtoc = time.clock() - print(' '.join(['Looping over templates and streams took:', - str(outtoc - outtic), 's'])) - if debug >= 2: - print(' '.join(['The shape of the returned cccsums is:', - str(np.shape(cccsums))])) - print(' '.join(['This is from', str(len(templates)), 'templates'])) - print(' '.join(['Correlated with', str(len(stream)), - 'channels of data'])) + debug_print(' '.join(['Looping over templates and streams took:', + str(outtoc - outtic), 's']), 0, debug) + debug_print('The shape of the returned cccsums is: %s\n' + 'This is from %i templates\nCorrelated with %i channels of ' + 'data' % (cccsums.shape, len(templates), len(stream)), 2, + debug) detections = [] if output_cat: det_cat = Catalog() @@ -3877,9 +3991,9 @@ def match_filter(template_names, template_list, st, threshold, elif str(threshold_type) == str('av_chan_corr'): rawthresh = threshold * no_chans[i] # Findpeaks returns a list of tuples in the form [(cccsum, sample)] - print(' '.join(['Threshold is set at:', str(rawthresh)])) - print(' '.join(['Max of data is:', str(max(cccsum))])) - print(' '.join(['Mean of data is:', str(np.mean(cccsum))])) + debug_print("Threshold is set at: %f\nMax of data is %f\nMean of " + "data is %f" % (rawthresh, max(cccsum), np.mean(cccsum)), + 0, debug) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and @@ -3890,38 +4004,39 @@ def match_filter(template_names, template_list, st, threshold, rawthresh=rawthresh, plotdir=plotdir, plot_format=plot_format, i=i) if debug >= 4: - print(' '.join(['Saved the cccsum to:', _template_names[i], - stream[0].stats.starttime.datetime. - strftime('%Y%j')])) np.save(_template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) + debug_print( + ' '.join(['Saved the cccsum to:', _template_names[i], + stream[0].stats.starttime.datetime.strftime('%Y%j')]), + 4, debug) tic = time.clock() if max(cccsum) > rawthresh: peaks = find_peaks2_short( arr=cccsum, thresh=rawthresh, - trig_int=trig_int * stream[0].stats.sampling_rate, debug=debug, + trig_int=trig_int * stream[0].stats.sampling_rate, + debug=debug, starttime=stream[0].stats.starttime, samp_rate=stream[0].stats.sampling_rate) else: - print('No peaks found above threshold') + debug_print('No peaks found above threshold', 0, debug) peaks = False toc = time.clock() - if debug >= 1: - print(' '.join(['Finding peaks took:', str(toc - tic), 's'])) + debug_print('Finding peaks took: %f s' % (toc - tic), 0, debug) if peaks: for peak in peaks: - detecttime = stream[0].stats.starttime +\ - peak[1] / stream[0].stats.sampling_rate + detecttime = stream[0].stats.starttime + \ + peak[1] / stream[0].stats.sampling_rate # Detect time must be valid QuakeML uri within resource_id. # This will write a formatted string which is still # readable by UTCDateTime if not output_event and not output_cat: det_ev = None else: + det_time = str(detecttime.strftime('%Y%m%dT%H%M%S.%f')) ev = Event(resource_id=ResourceIdentifier( - id=_template_names[i] + '_' + - str(detecttime.strftime('%Y%m%dT%H%M%S.%f')), + id=_template_names[i] + '_' + det_time, prefix='smi:local')) ev.creation_info = CreationInfo( author='EQcorrscan', creation_time=UTCDateTime()) @@ -3936,7 +4051,7 @@ def match_filter(template_names, template_list, st, threshold, [tr.stats.starttime for tr in template]) for tr in template: if (tr.stats.station, tr.stats.channel) \ - not in chans[i]: + not in chans[i]: continue else: pick_tm = detecttime + (tr.stats.starttime - @@ -3974,6 +4089,7 @@ def match_filter(template_names, template_list, st, threshold, if __name__ == "__main__": import doctest + doctest.testmod() # List files to be removed after doctest cleanup = ['test_tar_write.tgz', 'test_csv_write.csv', 'test_quakeml.ml', diff --git a/eqcorrscan/core/subspace.py b/eqcorrscan/core/subspace.py index cf52a4d97..44d88fc4d 100644 --- a/eqcorrscan/core/subspace.py +++ b/eqcorrscan/core/subspace.py @@ -32,6 +32,7 @@ WaveformStreamID, Pick from eqcorrscan.utils.clustering import svd +from eqcorrscan.utils.debug_log import debug_print from eqcorrscan.utils import findpeaks, pre_processing, stacking, plotting from eqcorrscan.core.match_filter import Detection, extract_from_stream from eqcorrscan.utils.plotting import subspace_detector_plot, subspace_fc_plot @@ -477,8 +478,7 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, detections = [] # First process the stream if process: - if debug > 0: - print('Processing Stream') + debug_print('Processing Stream', 0, debug) stream, stachans = _subspace_process( streams=[st.copy()], lowcut=detector.lowcut, highcut=detector.highcut, filt_order=detector.filt_order, @@ -500,10 +500,8 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, Nc = 1 # Here do all ffts fft_vars = _do_ffts(detector, stream, Nc) - if debug > 0: - print('Computing detection statistics') - if debug > 0: - print('Preallocating stats matrix') + debug_print('Computing detection statistics', 0, debug) + debug_print('Preallocating stats matrix', 0, debug) stats = np.zeros((len(stream[0]), (len(stream[0][0]) // Nc) - (fft_vars[4] // Nc) + 1)) for det_freq, data_freq_sq, data_freq, i in zip(fft_vars[0], fft_vars[1], @@ -512,8 +510,7 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, # Calculate det_statistic in frequency domain stats[i] = _det_stat_freq(det_freq, data_freq_sq, data_freq, fft_vars[3], Nc, fft_vars[4], fft_vars[5]) - if debug >= 1: - print('Stats matrix is shape %s' % str(stats[i].shape)) + debug_print('Stats matrix is shape %s' % str(stats[i].shape), 0, debug) if debug >= 3: fig, ax = plt.subplots() t = np.arange(len(stats[i])) @@ -526,8 +523,7 @@ def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, plt.title('%s' % str(stream[0][i].stats.station)) plt.show() trig_int_samples = detector.sampling_rate * trig_int - if debug > 0: - print('Finding peaks') + debug_print('Finding peaks', 0, debug) peaks = [] for i in range(len(stream[0])): peaks.append(findpeaks.find_peaks2_short( diff --git a/eqcorrscan/core/template_gen.py b/eqcorrscan/core/template_gen.py index bc2bc5a0e..ce25225ad 100644 --- a/eqcorrscan/core/template_gen.py +++ b/eqcorrscan/core/template_gen.py @@ -49,6 +49,7 @@ from obspy import Stream, read, Trace, UTCDateTime, read_events +from eqcorrscan.utils.debug_log import debug_print from eqcorrscan.utils.sac_util import sactoevent from eqcorrscan.utils import pre_processing, sfile_util @@ -276,9 +277,8 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, # Read in waveform file st = Stream() for wavefile in wavefiles: - if debug > 0: - print(''.join(["I am going to read waveform data from: ", wavpath, - wavefile])) + debug_print(''.join(["I am going to read waveform data from: ", + wavpath, wavefile]), 0, debug) if os.path.isfile(wavpath + wavefile): # Read from a given directory st += read(wavpath + wavefile) @@ -551,25 +551,22 @@ def from_meta_file(meta_file, st, lowcut, highcut, samp_rate, filt_order, # Check that the event is within the data for pick in event.picks: if not data_start < pick.time < data_end: - if debug > 0: - print('Pick outside of data span:') - print('Pick time: ' + str(pick.time)) - print('Start time: ' + str(data_start)) - print('End time: ' + str(data_end)) + debug_print("Pick outside of data span:\nPick time %s\n" + "Start time %s\nEnd time: %s" % + (str(pick.time), str(data_start), str(data_end)), + 0, debug) use_event = False if not use_event: warnings.warn('Event is not within data time-span') continue # Read in pick info - if debug > 0: - print("I have found the following picks") + debug_print("I have found the following picks", 0, debug) for pick in event.picks: if not pick.waveform_id: print('Pick not associated with waveforms, will not use.') print(pick) continue - if debug > 0: - print(pick) + debug_print(pick, 0, debug) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform @@ -698,11 +695,10 @@ def from_seishub(catalog, url, lowcut, highcut, samp_rate, filt_order, endtime = starttime + process_len if not endtime > sub_catalog[-1].origins[0].time + data_pad: raise IOError('Events do not fit in processing window') - if debug > 0: - print('start-time: ' + str(starttime)) - print('end-time: ' + str(endtime)) - print('pick-time: ' + str(pick.time)) - print('.'.join([net, sta, loc, chan])) + debug_print('start-time: %s\nend-time: %s\npick-time: %s' % + (str(starttime), str(endtime), str(pick.time)), + 0, debug) + debug_print('.'.join([net, sta, loc, chan]), 0, debug) if sta in client.waveform.get_station_ids(network=net): if 'st' not in locals(): st = client.waveform.get_waveform(net, sta, loc, chan, @@ -817,8 +813,6 @@ def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, ... filt_order=4, length=3.0, prepick=0.15, ... swin='all', process_len=300, ... all_horiz=True) - BG.CLV..DPZ - BK.BKS.00.HHZ Pre-processing data >>> templates[0].plot(equal_scale=False, size=(800,600)) # doctest: +SKIP @@ -860,12 +854,11 @@ def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, if not endtime > sub_catalog[-1].origins[0].time + data_pad: raise TemplateGenError('Events do not fit in ' 'processing window') - if debug > 0: - print('start-time: ' + str(starttime)) - print('end-time: ' + str(endtime)) - print('pick-time: ' + str(pick.time)) - print('pick phase: ' + pick.phase_hint) - print('.'.join([net, sta, loc, chan])) + debug_print('start-time: %s\nend-time: %s\npick-time: %s\n' + 'pick phase: %s' % + (str(starttime), str(endtime), str(pick.time), + pick.phase_hint), 0, debug) + debug_print('.'.join([net, sta, loc, chan]), 0, debug) try: st += client.get_waveforms(net, sta, loc, chan, starttime, endtime) @@ -1054,6 +1047,7 @@ def template_gen(picks, st, length, swin='all', prepick=0.05, .. warning:: If there is no phase_hint included in picks, and swin=all, \ all channels with picks will be used. """ + from eqcorrscan.utils.debug_log import debug_print from eqcorrscan.utils.plotting import pretty_template_plot as tplot from eqcorrscan.core.bright_lights import _rms stations = [] @@ -1147,9 +1141,8 @@ def template_gen(picks, st, length, swin='all', prepick=0.05, swin in pick.phase_hint.upper(): starttime = pick.time - prepick if starttime is not None: - if debug > 0: - print("Cutting " + tr.stats.station + '.' + - tr.stats.channel) + debug_print("Cutting " + tr.stats.station + '.' + + tr.stats.channel, 0, debug) if not delayed: starttime = event_start_time tr_cut = tr.copy().trim(starttime=starttime, @@ -1163,9 +1156,9 @@ def template_gen(picks, st, length, swin='all', prepick=0.05, if len(tr_cut.data) == (tr_cut.stats.sampling_rate * length) + 1: tr_cut.data = tr_cut.data[0:-1] - if debug > 0: - print('Cut starttime = ' + str(tr_cut.stats.starttime)) - print('Cut endtime = ' + str(tr_cut.stats.endtime)) + debug_print('Cut starttime = %s\nCut endtime %s' % + (str(tr_cut.stats.starttime), + str(tr_cut.stats.endtime)), 0, debug) if min_snr is not None and \ max(tr_cut.data) / noise_amp < min_snr: print('Signal-to-noise ratio below threshold for %s.%s' % @@ -1173,8 +1166,9 @@ def template_gen(picks, st, length, swin='all', prepick=0.05, continue st1 += tr_cut used_tr = True - if debug > 0 and not used_tr: - print('No pick for ' + tr.stats.station + '.' + tr.stats.channel) + if not used_tr: + debug_print('No pick for ' + tr.stats.station + '.' + + tr.stats.channel, 0, debug) if plot: background = stplot.trim( st1.sort(['starttime'])[0].stats.starttime - 10, diff --git a/eqcorrscan/doc/intro.rst b/eqcorrscan/doc/intro.rst index d62575568..117cbb746 100644 --- a/eqcorrscan/doc/intro.rst +++ b/eqcorrscan/doc/intro.rst @@ -30,7 +30,7 @@ get involved the best place to start, and the most valuable thing for your understanding, and for the health of this package would be to contribute tests and documentation. -Installation - Updated for version 0.2.0 +Installation - Updated for version 0.2.x ---------------------------------------- In general we recommend users to install EQcorrscan in a virtual environment, @@ -42,6 +42,9 @@ environment with the following: conda create -n eqcorrscan colorama numpy scipy matplotlib obspy bottleneck pyproj source activate eqcorrscan +Non-Python dependancies: Ubuntu: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Prior to installing the python routines you will need to install the fftw library. On linux use apt (or your default package manager - note you may need sudo access): @@ -50,6 +53,9 @@ sudo access): apt-get install libfftw3-dev +Non-Python dependancies: OSX: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + For OS-X systems install fftw from conda, available via the menpo channel: .. code-block:: bash @@ -62,6 +68,13 @@ not clang. To do this you should also install gcc into your conda environment: .. code-block:: bash conda install gcc + +Note that you can install fftw and gcc from other sources, however, we know there +is an issue with homebrew gcc-4.9 (but not with macports gcc-4.9) - other +gcc versions tested do work however. + +Non-Python dependancies: Windows: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For Windows systems you should follow the instructions on the |fftw-windows| page and use the pre-compiled dynamic libraries. These should be installed @@ -72,6 +85,9 @@ used), as such, by default, the correlation routines are compiled as serial workflows on windows. If you have a need for this threading in windows please get in touch with the developers. +Final EQcorrscan install: +~~~~~~~~~~~~~~~~~~~~~~~~~ + Once you have installed fftw the EQcorrscan install should be as simple as: .. code-block:: bash diff --git a/eqcorrscan/doc/submodules/utils.correlate.rst b/eqcorrscan/doc/submodules/utils.correlate.rst index bc8d3eb1c..28b3e9652 100644 --- a/eqcorrscan/doc/submodules/utils.correlate.rst +++ b/eqcorrscan/doc/submodules/utils.correlate.rst @@ -14,8 +14,128 @@ correlate fftw_multi_normxcorr fftw_normxcorr - multichannel_normxcorr numpy_normxcorr time_multi_normxcorr + get_array_xcorr + get_stream_xcorr + register_array_xcorr - .. comment to end block \ No newline at end of file + + .. comment to end block + +All functions within this module (and therefore all correlation functions used +in EQcorrscan) are normalised cross-correlations, which follow the definition +that Matlab uses for |normxcorr2|. + +In the time-domain this is as follows + .. math:: + c(t) = \frac{\sum_{y}(a_{y} - \overline{a})(b_{t+y} - \overline{b_t})}{\sqrt{\sum_{y}(a_{y} - \overline{a})(a_{y} - \overline{a})\sum_{y}(b_{t+y} - \overline{b_t})(b_{t+y} - \overline{b_t})}} + +where :math:`c(t)` is the normalised cross-correlation at at time :math:`t`, +:math:`a` is the template window which has length :math:`y`, :math:`b` is the +continuous data, and :math:`\overline{a}` is the mean of the template and +:math:`\overline{b_t}` is the local mean of the continuous data. + +In practice the correlations only remove the mean of the template once, but we do +remove the mean from the continuous data at every time-step. Without doing this +(just removing the global mean), correlations are affected by jumps in average +amplitude, which is most noticeable during aftershock sequences, or when using +short templates. + +In the frequency domain (functions :func:`eqcorrscan.utils.correlate.numpy_normxcorr`, +:func:`eqcorrscan.utils.correlate.fftw_normxcorr`, :func:`fftw_multi_normxcorr`), +correlations are computed using an equivalent method. All methods are tested +against one-another to check for internal consistency, and checked against results +computed using Matlab's |normxcorr2| function. These routines also give the same +(within 0.00001) of openCV cross-correlations. + +.. |normxcorr2| raw:: html + + normxcorr2 + +Selecting a correlation function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +EQcorrscan strives to use sensible default algorithms for calculating +correlation values, however, you may want to change how correlations are +caclulated to be more advantageous to your specific needs. + + +There are currently 3 different correlations functions currently included in EQcorrscan: + + 1. :func:`eqcorrscan.utils.correlate.numpy_normxcorr` known as "numpy" + + 2. :func:`eqcorrscan.utils.correlate.time_multi_normxcorr` known as "time_domain" + + 3. :func:`eqcorrscan.utils.correlate.fftw_normxcorr` known as "fftw" + +Number 3 is the default. + +Switching which correlation function is used +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can switch between correlation functions using the `xcorr_func` parameter +included in: +- :func:`eqcorrscan.core.match_filter.match_filter`, +- :meth:`eqcorrscan.core.match_filter.Tribe.detect`, +- :meth:`eqcorrscan.core.match_filter.Template.detect` + +by: + +1. passing a string (eg "numpy", "time_domain", or "fftw") or; +2. passing a function + +for example: + +.. code-block:: python + + >>> import obspy + >>> import numpy as np + >>> from eqcorrscan.utils.correlate import numpy_normxcorr, set_xcorr + >>> from eqcorrscan.core.match_filter import match_filter + + + >>> # generate some toy templates and stream + >>> random = np.random.RandomState(42) + >>> template = obspy.read() + >>> stream = obspy.read() + >>> for num, tr in enumerate(stream): # iter stream and embed templates + ... data = tr.data + ... tr.data = random.randn(6000) * 5 + ... tr.data[100: 100 + len(data)] = data + + >>> # do correlation using numpy rather than fftw + >>> detections = match_filter(['1'], [template], stream, .5, 'absolute', + ... 1, False, xcorr_func='numpy') + + >>> # do correlation using a custom function + >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs): + ... # Just to keep example short call other xcorr function + ... print('calling custom xcorr function') + ... return numpy_normxcorr(templates, stream, pads, *args, **kwargs) + + >>> detections = match_filter( + ... ['1'], [template], stream, .5, 'absolute', 1, False, + ... xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS + calling custom xcorr function... + + +You can also use the set_xcorr object (eqcorrscan.utils.correlate.set_xcorr) +to change which correlation function is used. This can be done permanently +or within the scope of a context manager: + +.. code-block:: python + + >>> # change the default xcorr function for all code in the with block + >>> with set_xcorr(custom_normxcorr): + ... detections = match_filter(['1'], [template], stream, .5, + ... 'absolute', 1, False) # doctest:+ELLIPSIS + calling custom xcorr function... + + >>> # permanently set the xcorr function (until the python kernel restarts) + >>> set_xcorr(custom_normxcorr) + default changed to custom_normxcorr + >>> detections = match_filter(['1'], [template], stream, .5, 'absolute', + ... 1, False) # doctest:+ELLIPSIS + calling custom xcorr function... + >>> set_xcorr.revert() # change it back to the previous state diff --git a/eqcorrscan/doc/submodules/utils.findpeaks.rst b/eqcorrscan/doc/submodules/utils.findpeaks.rst index 84a64b216..b00695c68 100644 --- a/eqcorrscan/doc/submodules/utils.findpeaks.rst +++ b/eqcorrscan/doc/submodules/utils.findpeaks.rst @@ -14,6 +14,5 @@ findpeaks coin_trig find_peaks2_short - find_peaks_dep .. comment to end block diff --git a/eqcorrscan/doc/tutorials/clustering.rst b/eqcorrscan/doc/tutorials/clustering.rst index b6e1422ca..7ef250a73 100644 --- a/eqcorrscan/doc/tutorials/clustering.rst +++ b/eqcorrscan/doc/tutorials/clustering.rst @@ -18,30 +18,26 @@ threshold to 1,000km .. code-block:: python - from eqcorrscan.utils.clustering import space_cluster - from obspy.clients.fdsn import Client - from obspy import UTCDateTime - client = Client("IRIS") - starttime = UTCDateTime("2002-01-01") - endtime = UTCDateTime("2002-02-01") - cat = client.get_events(starttime=starttime, endtime=endtime, - minmagnitude=6, catalog="ISC") - groups = space_cluster(catalog=cat, d_thresh=1000, show=False) + >>> from eqcorrscan.utils.clustering import space_cluster + >>> from obspy.clients.fdsn import Client + >>> from obspy import UTCDateTime + + >>> client = Client("IRIS") + >>> starttime = UTCDateTime("2002-01-01") + >>> endtime = UTCDateTime("2002-02-01") + >>> cat = client.get_events(starttime=starttime, endtime=endtime, + ... minmagnitude=6, catalog="ISC") + >>> groups = space_cluster(catalog=cat, d_thresh=1000, show=False) Download a local catalog of earthquakes and cluster much finer (distance threshold of 2km). .. code-block:: python - from eqcorrscan.utils.clustering import space_cluster - from obspy.clients.fdsn import Client - from obspy import UTCDateTime - client = Client("NCEDC") - starttime = UTCDateTime("2002-01-01") - endtime = UTCDateTime("2002-02-01") - cat = client.get_events(starttime=starttime, endtime=endtime, - minmagnitude=2) - groups = space_cluster(catalog=cat, d_thresh=2, show=False) + >>> client = Client("NCEDC") + >>> cat = client.get_events(starttime=starttime, endtime=endtime, + ... minmagnitude=2) + >>> groups = space_cluster(catalog=cat, d_thresh=2, show=False) Setting show to true will plot the dendrogram for grouping with individual @@ -66,15 +62,11 @@ distance threshold and a one-day temporal limit. .. code-block:: python - from eqcorrscan.utils.clustering import space_time_cluster - from obspy.clients.fdsn import Client - from obspy import UTCDateTime - client = Client("IRIS") - starttime = UTCDateTime("2002-01-01") - endtime = UTCDateTime("2002-02-01") - cat = client.get_events(starttime=starttime, endtime=endtime, - minmagnitude=6, catalog="ISC") - groups = space_time_cluster(catalog=cat, t_thresh=86400, d_thresh=1000) + >>> from eqcorrscan.utils.clustering import space_time_cluster + >>> client = Client("IRIS") + >>> cat = client.get_events(starttime=starttime, endtime=endtime, + ... minmagnitude=6, catalog="ISC") + >>> groups = space_time_cluster(catalog=cat, t_thresh=86400, d_thresh=1000) Cluster according to cross-correlation values @@ -91,25 +83,32 @@ in the tests directory. .. code-block:: python - from obspy import read - import glob - import os - from eqcorrscan.utils.clustering import cluster - # You will need to edit this line to the location of your eqcorrscan repo. - testing_path = 'eqcorrscan/tests/test_data/similar_events' - stream_files = glob.glob(os.path.join(testing_path, '*')) - stream_list = [(read(stream_file), i) - for i, stream_file in enumerate(stream_files)] - for stream in stream_list: - for tr in stream[0]: - if tr.stats.station not in ['WHAT2', 'WV04', 'GCSZ']: - stream[0].remove(tr) - continue - tr.detrend('simple') - tr.filter('bandpass', freqmin=5.0, freqmax=15.0) - tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45) - groups = cluster(template_list=stream_list, show=False, - corr_thresh=0.3) + >>> from obspy import read + >>> import glob + >>> import os + >>> from eqcorrscan.utils.clustering import cluster + >>> # You will need to edit this line to the location of your eqcorrscan repo. + >>> testing_path = 'eqcorrscan/tests/test_data/similar_events' + >>> stream_files = glob.glob(os.path.join(testing_path, '*')) + >>> stream_list = [(read(stream_file), i) + ... for i, stream_file in enumerate(stream_files)] + >>> for stream in stream_list: + ... for tr in stream[0]: + ... if tr.stats.station not in ['WHAT2', 'WV04', 'GCSZ']: + ... stream[0].remove(tr) # doctest:+ELLIPSIS + ... continue + ... tr = tr.detrend('simple') + ... tr = tr.filter('bandpass', freqmin=5.0, freqmax=15.0) + ... tr = tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45) + + >>> groups = cluster(template_list=stream_list, show=False, + ... corr_thresh=0.3, cores=2) + Computing the distance matrix using 2 cores + Computing linkage + Clustering + Found 9 groups + Extracting and grouping + Stack waveforms (linear) ------------------------ @@ -122,30 +121,12 @@ The following examples use the test data in the eqcorrscan github repository. .. code-block:: python - from obspy import read - import glob - import os - from eqcorrscan.utils.clustering import cluster - from eqcorrscan.utils.stacking import linstack - # You will need to edit this line to the location of your eqcorrscan repo. - testing_path = 'eqcorrscan/tests/test_data/similar_events' - stream_files = glob.glob(os.path.join(testing_path, '*')) - stream_list = [(read(stream_file), i) - for i, stream_file in enumerate(stream_files)] - for stream in stream_list: - for tr in stream[0]: - if tr.stats.station not in ['WHAT2', 'WV04', 'GCSZ']: - stream[0].remove(tr) - continue - tr.detrend('simple') - tr.filter('bandpass', freqmin=5.0, freqmax=15.0) - tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45) - groups = cluster(template_list=stream_list, show=False, - corr_thresh=0.3) - # groups[0] should contain 3 streams, which we can now stack - # Groups are returned as lists of tuples, of the stream and event index - group_streams = [st_tuple[0] for st_tuple in groups[0]] - stack = linstack(streams=group_streams) + >>> from eqcorrscan.utils.stacking import linstack + + >>> # groups[0] should contain 3 streams, which we can now stack + >>> # Groups are returned as lists of tuples, of the stream and event index + >>> group_streams = [st_tuple[0] for st_tuple in groups[0]] + >>> stack = linstack(streams=group_streams) @@ -162,27 +143,10 @@ of the instantaneous phase. In this manor coherent signals are amplified. .. code-block:: python - from obspy import read - import glob - import os - from eqcorrscan.utils.clustering import cluster - from eqcorrscan.utils.stacking import PWS_stack - # You will need to edit this line to the location of your eqcorrscan repo. - testing_path = 'eqcorrscan/tests/test_data/similar_events' - stream_files = glob.glob(os.path.join(testing_path, '*')) - stream_list = [(read(stream_file), i) - for i, stream_file in enumerate(stream_files)] - for stream in stream_list: - for tr in stream[0]: - if tr.stats.station not in ['WHAT2', 'WV04', 'GCSZ']: - stream[0].remove(tr) - continue - tr.detrend('simple') - tr.filter('bandpass', freqmin=5.0, freqmax=15.0) - tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45) - groups = cluster(template_list=stream_list, show=False, - corr_thresh=0.3) - # groups[0] should contain 3 streams, which we can now stack - # Groups are returned as lists of tuples, of the stream and event index - group_streams = [st_tuple[0] for st_tuple in groups[0]] - stack = PWS_stack(streams=group_streams) \ No newline at end of file + >>> from eqcorrscan.utils.stacking import PWS_stack + + >>> # groups[0] should contain 3 streams, which we can now stack + >>> # Groups are returned as lists of tuples, of the stream and event index + >>> stack = PWS_stack(streams=group_streams) + Computing instantaneous phase + Computing the phase stack diff --git a/eqcorrscan/doc/tutorials/mag-calc.rst b/eqcorrscan/doc/tutorials/mag-calc.rst index fe4c9caeb..35567ab27 100644 --- a/eqcorrscan/doc/tutorials/mag-calc.rst +++ b/eqcorrscan/doc/tutorials/mag-calc.rst @@ -15,18 +15,19 @@ This example requires data downloaded from the eqcorrscan github repository. .. code-block:: python - from eqcorrscan.utils.mag_calc import amp_pick_sfile - from obspy.core.event import Event - import os - testing_path = 'eqcorrscan/tests/test_data' - sfile = os.path.join(testing_path, 'REA', 'TEST_', - '01-0411-15L.S201309') - datapath = os.path.join(testing_path, 'WAV', 'TEST_') - respdir = testing_path - event = amp_pick_sfile(sfile=sfile, datapath=datapath, - respdir=respdir, chans=['Z'], var_wintype=True, - winlen=0.9, pre_pick=0.2, pre_filt=True, - lowcut=1.0, highcut=20.0, corners=4) + >>> from eqcorrscan.utils.mag_calc import amp_pick_sfile + >>> from obspy.core.event import Event + >>> import os + >>> testing_path = 'eqcorrscan/tests/test_data' + >>> sfile = os.path.join(testing_path, 'REA', 'TEST_', + ... '01-0411-15L.S201309') + >>> datapath = os.path.join(testing_path, 'WAV', 'TEST_') + >>> respdir = testing_path + >>> event = amp_pick_sfile(sfile=sfile, datapath=datapath, + ... respdir=respdir, chans=['Z'], var_wintype=True, + ... winlen=0.9, pre_pick=0.2, pre_filt=True, + ... lowcut=1.0, highcut=20.0, corners=4) # doctest:+ELLIPSIS + Working on ... Relative moment by singular-value decomposition ----------------------------------------------- @@ -41,32 +42,33 @@ This example requires data downloaded from the eqcorrscan github repository. .. code-block:: python - from eqcorrscan.utils.mag_calc import SVD_moments - from obspy import read - import glob - import os - from eqcorrscan.utils.clustering import SVD - import numpy as np - # Do the set-up - testing_path = 'eqcorrscan/tests/test_data/similar_events' - stream_files = glob.glob(os.path.join(testing_path, '*')) - stream_list = [read(stream_file) for stream_file in stream_files] - event_list = [] - for i, stream in enumerate(stream_list): - st_list = [] - for tr in stream: - # Only use the vertical channels of sites with known high similarity. - # You do not need to use this step for your data. - if (tr.stats.station, tr.stats.channel) not in\ - [('WHAT2', 'SH1'), ('WV04', 'SHZ'), ('GCSZ', 'EHZ')]: - stream.remove(tr) - continue - tr.detrend('simple') - tr.filter('bandpass', freqmin=5.0, freqmax=15.0) - tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45) - st_list.append(i) - event_list.append(st_list) - event_list = np.asarray(event_list).T.tolist() - SVectors, SValues, Uvectors, stachans = SVD(stream_list=stream_list) - M, events_out = SVD_moments(U=Uvectors, s=SValues, V=SVectors, - stachans=stachans, event_list=event_list) \ No newline at end of file + >>> from eqcorrscan.utils.mag_calc import svd_moments + >>> from obspy import read + >>> import glob + >>> from eqcorrscan.utils.clustering import svd + >>> import numpy as np + >>> # Do the set-up + >>> testing_path = 'eqcorrscan/tests/test_data/similar_events' + >>> stream_files = glob.glob(os.path.join(testing_path, '*')) + >>> stream_list = [read(stream_file) for stream_file in stream_files] + >>> event_list = [] + >>> for i, stream in enumerate(stream_list): # doctest:+ELLIPSIS + ... st_list = [] + ... for tr in stream: + ... # Only use the vertical channels of sites with known high similarity. + ... # You do not need to use this step for your data. + ... if (tr.stats.station, tr.stats.channel) not in\ + ... [('WHAT2', 'SH1'), ('WV04', 'SHZ'), ('GCSZ', 'EHZ')]: + ... stream.remove(tr) + ... continue + ... tr.detrend('simple') + ... tr.filter('bandpass', freqmin=5.0, freqmax=15.0) + ... tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45) + ... st_list.append(i) + ... event_list.append(st_list) + + >>> event_list = np.asarray(event_list).T.tolist() + >>> SVectors, SValues, Uvectors, stachans = svd(stream_list=stream_list) + >>> M, events_out = svd_moments(u=Uvectors, s=SValues, v=SVectors, + ... stachans=stachans, event_list=event_list) # doctest:+ELLIPSIS + Created Kernel matrix: ... \ No newline at end of file diff --git a/eqcorrscan/doc/tutorials/matched-filter.rst b/eqcorrscan/doc/tutorials/matched-filter.rst index d4fd1a6ea..26e32d609 100644 --- a/eqcorrscan/doc/tutorials/matched-filter.rst +++ b/eqcorrscan/doc/tutorials/matched-filter.rst @@ -64,14 +64,14 @@ event: .. code-block:: python - import glob - from eqcorrscan.core.match_filter import Template - sac_files = glob.glob('eqcorrscan/tests/test_data/SAC/2014p611252') - # sac_files is now a list of all the SAC files for event id:2014p611252 - template = Template().construct( - method='from_sac', name='test', lowcut=2.0, highcut=8.0, - samp_rate=20.0, filt_order=4, prepick=0.1, swin='all', - length=2.0, sac_files=sac_files) + >>> import glob + >>> from eqcorrscan.core.match_filter import Template + >>> sac_files = glob.glob('eqcorrscan/tests/test_data/SAC/2014p611252/*') + >>> # sac_files is now a list of all the SAC files for event id:2014p611252 + >>> template = Template().construct( + ... method='from_sac', name='test', lowcut=2.0, highcut=8.0, + ... samp_rate=20.0, filt_order=4, prepick=0.1, swin='all', + ... length=2.0, sac_files=sac_files) Tribe creation @@ -84,19 +84,20 @@ to their start-time, but you can rename them later if you wish: .. code-block:: python - from eqcorrscan.core.match_filter import Tribe - from obspy.clients.fdsn import Client - - client = Client('NCEDC') - catalog = client.get_events(eventid='72572665', includearrivals=True) - # To speed the example we have a catalog of one event, but you can have - # more, we are also only using the first five picks, again to speed the - # example. - catalog[0].picks = catalog[0].picks[0:5] - tribe = Tribe().construct( - method='from_client', catalog=catalog, client_id='NCEDC', lowcut=2.0, - highcut=8.0, samp_rate=20.0, filt_order=4, length=6.0, prepick=0.1, - swin='all', process_len=3600, all_horiz=True) + >>> from eqcorrscan.core.match_filter import Tribe + >>> from obspy.clients.fdsn import Client + + >>> client = Client('NCEDC') + >>> catalog = client.get_events(eventid='72572665', includearrivals=True) + >>> # To speed the example we have a catalog of one event, but you can have + >>> # more, we are also only using the first five picks, again to speed the + >>> # example. + >>> catalog[0].picks = catalog[0].picks[0:5] + >>> tribe = Tribe().construct( + ... method='from_client', catalog=catalog, client_id='NCEDC', lowcut=2.0, + ... highcut=8.0, samp_rate=20.0, filt_order=4, length=6.0, prepick=0.1, + ... swin='all', process_len=3600, all_horiz=True) + Pre-processing data Matched-filter detection using a Tribe -------------------------------------- @@ -115,12 +116,12 @@ data by running the following: .. code-block:: python - from obspy import UTCDateTime + >>> from obspy import UTCDateTime - party, stream = tribe.client_detect( - client=client, starttime=UTCDateTime(2016, 1, 2), - endtime=UTCDateTime(2016, 1, 3), threshold=8, threshold_type='MAD', - trig_int=6, plotvar=False, return_stream=True) + >>> party, stream = tribe.client_detect( + ... client=client, starttime=UTCDateTime(2016, 1, 2), + ... endtime=UTCDateTime(2016, 1, 3), threshold=8, threshold_type='MAD', + ... trig_int=6, plotvar=False, return_stream=True) Lag-calc using a Party ---------------------- @@ -130,8 +131,10 @@ generate re-picked catalogues using lag-calc: .. code-block:: python - repicked_catalog = party.lag_calc( - stream, pre_processed=False, shift_len=0.2, min_cc=0.4) + >>> stream = stream.merge().sort(['station']) + >>> repicked_catalog = party.lag_calc(stream, pre_processed=False, + ... shift_len=0.2, min_cc=0.4) # doctest:+ELLIPSIS + 5 Trace(s) in Stream:... By using the above examples you can go from a standard catalog available from data centers, to a matched-filter detected and cross-correlation repicked diff --git a/eqcorrscan/doc/tutorials/subspace.rst b/eqcorrscan/doc/tutorials/subspace.rst index 01d41ba9a..c0130643b 100644 --- a/eqcorrscan/doc/tutorials/subspace.rst +++ b/eqcorrscan/doc/tutorials/subspace.rst @@ -44,8 +44,8 @@ To begin with you will need to create a **Detector**: .. code-block:: python - from eqcorrscan.core import subspace - detector = subspace.Detector() + >>> from eqcorrscan.core import subspace + >>> detector = subspace.Detector() This will create an empty *detector* object. These objects have various attributes, including the data to be used as a detector (*detector.data*), alongside the full @@ -62,9 +62,19 @@ aligned (see clustering submodule for alignment methods). .. code-block:: python - detector.construct(streams=streams, lowcut=2, highcut=9, filt_order=4, - sampling_rate=20, multiplex=True, name='Test_1', - align=True, shift_len=0.5) + >>> from obspy import read + >>> import glob + >>> wavefiles = glob.glob('eqcorrscan/tests/test_data/similar_events/*') + >>> streams = [read(w) for w in wavefiles[0:3]] + >>> # Channels must all be the same length + >>> for st in streams: + ... for tr in st: + ... tr.data = tr.data[int(41.5 * tr.stats.sampling_rate): + ... int(44 * tr.stats.sampling_rate)] + >>> detector.construct(streams=streams, lowcut=2, highcut=9, filt_order=4, + ... sampling_rate=20, multiplex=True, name='Test_1', + ... align=True, shift_len=0.5, reject=0.2) + Detector: Test_1 This will populate all the attributes of your *detector* object, and fill the *detector.data* with the full input basis vector matrix. @@ -76,7 +86,8 @@ EQcorrscan simply use the *partition* method: .. code-block:: python - detector.partition(4) + >>> detector.partition(2) + Detector: Test_1 This will populate *detector.data* with the first four, left-most input basis vectors. You can test to see how much of your original design set is @@ -84,7 +95,7 @@ described by this detector by using the *energy_capture* method: .. code-block:: python - percent_capture = detector.energy_capture() + >>> percent_capture = detector.energy_capture() This will return a percentage capture, you can run this for multiple dimensions to test what dimension best suits your application. Again, details for this @@ -99,7 +110,12 @@ True. .. code-block:: python - detections = detector.detect(st=stream, threshold=0.5, trig_int=3) + >>> stream = read(wavefiles[0]) + >>> for tr in stream: + ... tr.data = tr.data[int(41.5 * tr.stats.sampling_rate): + ... int(44 * tr.stats.sampling_rate)] + >>> detections = detector.detect(st=stream, threshold=0.5, trig_int=3) # doctest:+ELLIPSIS + Detection took ... Advanced Example diff --git a/eqcorrscan/doc/tutorials/template-creation.rst b/eqcorrscan/doc/tutorials/template-creation.rst index 4f6c4a554..efc709291 100644 --- a/eqcorrscan/doc/tutorials/template-creation.rst +++ b/eqcorrscan/doc/tutorials/template-creation.rst @@ -1,6 +1,10 @@ Template creation ================= +Note: These tutorials are for the functional workflow - for details on creating +Template and Tribe objects see the :doc:`matched filter ` docs +page. + Simple example -------------- @@ -18,15 +22,16 @@ FDSN (see |obspy_fdsn| for a list of possible clients): .. code-block:: python - from obspy.clients.fdsn import Client - from obspy.core.event import Catalog - from eqcorrscan.core.template_gen import from_client - client = Client('NCEDC') - catalog = client.get_events(eventid='72572665', includearrivals=True) - templates = from_client(catalog=catalog, client_id='NCEDC', - lowcut=2.0, highcut=9.0, samp_rate=20.0, - filt_order=4, length=3.0, prepick=0.15, - swin='all', process_len=200) + >>> from obspy.clients.fdsn import Client + >>> from obspy.core.event import Catalog + >>> from eqcorrscan.core.template_gen import from_client + >>> client = Client('NCEDC') + >>> catalog = client.get_events(eventid='72572665', includearrivals=True) + >>> templates = from_client(catalog=catalog, client_id='NCEDC', + ... lowcut=2.0, highcut=9.0, samp_rate=20.0, + ... filt_order=4, length=3.0, prepick=0.15, + ... swin='all', process_len=200) + Pre-processing data This will download data for a single event (given by eventid) from the NCEDC database, then use that information to download relevant waveform data. These @@ -77,7 +82,7 @@ which is useful for template storage. However we do not constrain you to this. .. code-block:: python - template.write('template.ms', format="MSEED") + >>> templates[0].write('template.ms', format="MSEED") Advanced example diff --git a/eqcorrscan/lib/libutils.def b/eqcorrscan/lib/libutils.def index d7c03aa58..3efe031e2 100644 --- a/eqcorrscan/lib/libutils.def +++ b/eqcorrscan/lib/libutils.def @@ -3,5 +3,7 @@ EXPORTS normxcorr_fftw normxcorr_fftw_threaded normxcorr_time + normxcorr_time_threaded multi_normxcorr_fftw multi_normxcorr_time + multi_normxcorr_time_threaded diff --git a/eqcorrscan/lib/multi_corr.c b/eqcorrscan/lib/multi_corr.c index 1d8a2faa9..d36d3f1b9 100644 --- a/eqcorrscan/lib/multi_corr.c +++ b/eqcorrscan/lib/multi_corr.c @@ -20,7 +20,18 @@ #include #include +#include #include +#if (defined(_MSC_VER)) + #if (_MSC_VER < 1800) + #include + #define isnanf(x) _isnanf(x) + #endif + #define inline __inline +#endif +#if (defined(__APPLE__) && !isnanf) + #define isnanf isnan +#endif #include #if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) #include @@ -30,19 +41,23 @@ #endif // Prototypes -int normxcorr_fftw(float*, int, int, float*, int, float*, int); +int normxcorr_fftw(float*, int, int, float*, int, float*, int, int*, int*); + +static inline int set_ncc(int t, int i, int template_len, int image_len, float value, int *used_chans, int *pad_array, float *ncc); -int normxcorr_fftw_threaded(float*, int, int, float*, int, float*, int); +int normxcorr_fftw_main(float*, int, int, float*, int, float*, int, double*, double*, double*, + fftw_complex*, fftw_complex*, fftw_complex*, fftw_plan, fftw_plan, fftw_plan, int*, int*); -int normxcorr_time(float*, int, float*, int, float*); +int normxcorr_fftw_threaded(float*, int, int, float*, int, float*, int, int*, int*); -int multi_normxcorr_fftw(float*, int, int, int, float*, int, float*, int); +void free_fftw_arrays(int, double**, double**, double**, fftw_complex**, fftw_complex**, fftw_complex**); -int multi_normxcorr_time(float*, int, int, float*, int, float*); +int multi_normxcorr_fftw(float*, int, int, int, float*, int, float*, int, int*, int*); // Functions int normxcorr_fftw_threaded(float *templates, int template_len, int n_templates, - float *image, int image_len, float *ncc, int fft_len){ + float *image, int image_len, float *ncc, int fft_len, + int *used_chans, int *pad_array) { /* Purpose: compute frequency domain normalised cross-correlation of real data using fftw Author: Calum J. Chamberlain @@ -58,8 +73,8 @@ int normxcorr_fftw_threaded(float *templates, int template_len, int n_templates, fft_len: Size for fft (n1) */ int N2 = fft_len / 2 + 1; - int i, t, startind; - double mean, stdev, old_mean, new_samp, old_samp, c, var=0.0, sum=0.0, acceptedDiff = 0.0000001; + int i, t, startind, status = 0; + double mean, stdev, old_mean, new_samp, old_samp, var=0.0, sum=0.0, acceptedDiff = 0.0000001; double * norm_sums = (double *) calloc(n_templates, sizeof(double)); double * template_ext = (double *) calloc(fft_len * n_templates, sizeof(double)); double * image_ext = (double *) calloc(fft_len, sizeof(double)); @@ -121,15 +136,12 @@ int normxcorr_fftw_threaded(float *templates, int template_len, int n_templates, stdev = sqrt(var); // Used for centering - taking only the valid part of the cross-correlation startind = template_len - 1; - for (t = 0; t < n_templates; ++t){ - if (var < acceptedDiff){ - ncc[t * (image_len - template_len + 1)] = 0; - } - else { - c = ((ccc[(t * fft_len) + startind] / (fft_len * n_templates)) - norm_sums[t] * mean) / stdev; - ncc[t * (image_len - template_len + 1)] = (float) c; - } - } + if (var >= acceptedDiff) { + for (t = 0; t < n_templates; ++t){ + double c = ((ccc[(t * fft_len) + startind] / (fft_len * n_templates)) - norm_sums[t] * mean) / stdev; + status += set_ncc(t, 0, template_len, image_len, (float) c, used_chans, pad_array, ncc); + } + } // Center and divide by length to generate scaled convolution for(i = 1; i < (image_len - template_len + 1); ++i){ // Need to cast to double otherwise we end up with annoying floating @@ -140,13 +152,10 @@ int normxcorr_fftw_threaded(float *templates, int template_len, int n_templates, mean = mean + (new_samp - old_samp) / template_len; var += (new_samp - old_samp) * (new_samp - mean + old_samp - old_mean) / (template_len); stdev = sqrt(var); - for (t=0; t < n_templates; ++t){ - if (var > acceptedDiff){ - c = ((ccc[(t * fft_len) + i + startind] / (fft_len * n_templates)) - norm_sums[t] * mean ) / stdev; - ncc[(t * (image_len - template_len + 1)) + i] = (float) c; - } - else{ - ncc[(t * (image_len - template_len + 1)) + i] = 0.0; + if (var > acceptedDiff) { // TODO: above is '>=', should they be the same? + for (t = 0; t < n_templates; ++t){ + double c = ((ccc[(t * fft_len) + i + startind] / (fft_len * n_templates)) - norm_sums[t] * mean ) / stdev; + status += set_ncc(t, i, template_len, image_len, (float) c, used_chans, pad_array, ncc); } } } @@ -166,12 +175,13 @@ int normxcorr_fftw_threaded(float *templates, int template_len, int n_templates, free(template_ext); free(image_ext); - return 0; + return status; } int normxcorr_fftw(float *templates, int template_len, int n_templates, - float *image, int image_len, float *ncc, int fft_len){ + float *image, int image_len, float *ncc, int fft_len, + int *used_chans, int *pad_array){ /* Purpose: compute frequency domain normalised cross-correlation of real data using fftw Author: Calum J. Chamberlain @@ -185,23 +195,89 @@ int normxcorr_fftw(float *templates, int template_len, int n_templates, ncc: Output for cross-correlation - should be pointer to memory - must be n_templates x image_len - template_len + 1 fft_len: Size for fft (n1) + Notes: + This is a wrapper around `normxcorr_fftw_main`, allocating required memory and plans + for that function. We have taken this outside the main function because creating plans + is not thread-safe and we want to call the main function from within an OpenMP loop. */ + int status = 0; int N2 = fft_len / 2 + 1; - int i, t, startind; - double mean, stdev, old_mean, new_samp, old_samp, c, var=0.0, sum=0.0, acceptedDiff = 0.0000001; - double * norm_sums = (double *) calloc(n_templates, sizeof(double)); - double * template_ext = (double *) calloc(fft_len * n_templates, sizeof(double)); - double * image_ext = (double *) calloc(fft_len, sizeof(double)); - double * ccc = (double *) fftw_malloc(sizeof(double) * fft_len * n_templates); - fftw_complex * outa = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * N2 * n_templates); - fftw_complex * outb = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * N2); - fftw_complex * out = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * N2 * n_templates); + // All memory allocated with `fftw_malloc` to ensure 16-byte aligned + double * template_ext = fftw_alloc_real(fft_len * n_templates); + double * image_ext = fftw_alloc_real(fft_len); + double * ccc = fftw_alloc_real(fft_len * n_templates); + fftw_complex * outa = fftw_alloc_complex(N2 * n_templates); + fftw_complex * outb = fftw_alloc_complex(N2); + fftw_complex * out = fftw_alloc_complex(N2 * n_templates); // Plan - fftw_plan pa = fftw_plan_dft_r2c_2d(n_templates, fft_len, template_ext, outa, FFTW_ESTIMATE); fftw_plan pb = fftw_plan_dft_r2c_1d(fft_len, image_ext, outb, FFTW_ESTIMATE); fftw_plan px = fftw_plan_dft_c2r_2d(n_templates, fft_len, out, ccc, FFTW_ESTIMATE); + // Initialise to zero + memset(template_ext, 0, fft_len * n_templates * sizeof(double)); + memset(image_ext, 0, fft_len * sizeof(double)); + + // Call the function to do the work + status = normxcorr_fftw_main(templates, template_len, n_templates, image, image_len, + ncc, fft_len, template_ext, image_ext, ccc, outa, outb, out, pa, pb, px, + used_chans, pad_array); + + // free memory and plans + fftw_destroy_plan(pa); + fftw_destroy_plan(pb); + fftw_destroy_plan(px); + + fftw_free(out); + fftw_free(outa); + fftw_free(outb); + fftw_free(ccc); + fftw_free(template_ext); + fftw_free(image_ext); + + fftw_cleanup(); + fftw_cleanup_threads(); + + return status; +} + + +int normxcorr_fftw_main(float *templates, int template_len, int n_templates, + float *image, int image_len, float *ncc, int fft_len, + double *template_ext, double *image_ext, double *ccc, + fftw_complex *outa, fftw_complex *outb, fftw_complex *out, + fftw_plan pa, fftw_plan pb, fftw_plan px, int *used_chans, + int *pad_array) { + /* + Purpose: compute frequency domain normalised cross-correlation of real data using fftw + Author: Calum J. Chamberlain + Date: 12/06/2017 + Args: + templates: Template signals + template_len: Length of template + n_templates: Number of templates (n0) + image: Image signal (to scan through) + image_len: Length of image + ncc: Output for cross-correlation - should be pointer to memory - + must be n_templates x image_len - template_len + 1 + It is assumed that ncc will be initialised to zero before + passing into this function + fft_len: Size for fft (n1) + template_ext: Input FFTW array for template transform (must be allocated) + image_ext: Input FFTW array for image transform (must be allocated) + ccc: Output FFTW array for reverse transform (must be allocated) + outa: Output FFTW array for template transform (must be allocatd) + outb: Output FFTW array for image transform (must be allocated) + out: Input array for reverse transform (must be allocated) + pa: Forward plan for templates + pb: Forward plan for image + px: Reverse plan + */ + int N2 = fft_len / 2 + 1; + int i, t, startind, status = 0; + double mean, stdev, old_mean, new_samp, old_samp, var=0.0, sum=0.0, acceptedDiff = 0.0000001; + double * norm_sums = (double *) calloc(n_templates, sizeof(double)); + // zero padding - and flip template for (t = 0; t < n_templates; ++t){ for (i = 0; i < template_len; ++i) @@ -215,8 +291,8 @@ int normxcorr_fftw(float *templates, int template_len, int n_templates, image_ext[i] = (double) image[i]; } // Compute ffts of template and image - fftw_execute(pa); - fftw_execute(pb); + fftw_execute_dft_r2c(pa, template_ext, outa); + fftw_execute_dft_r2c(pb, image_ext, outb); // Compute dot product for (t = 0; t < n_templates; ++t){ @@ -227,7 +303,8 @@ int normxcorr_fftw(float *templates, int template_len, int n_templates, } } // Compute inverse fft - fftw_execute(px); + fftw_execute_dft_c2r(px, out, ccc); + // Procedures for normalisation // Compute starting mean, will update this for (i=0; i < template_len; ++i){ @@ -240,17 +317,17 @@ int normxcorr_fftw(float *templates, int template_len, int n_templates, var += pow(image[i] - mean, 2) / (template_len); } stdev = sqrt(var); + + // Used for centering - taking only the valid part of the cross-correlation startind = template_len - 1; - for (t = 0; t < n_templates; ++t){ - if (var < acceptedDiff){ - ncc[t * (image_len - template_len + 1)] = 0; - } - else { - c = ((ccc[(t * fft_len) + startind] / (fft_len * n_templates)) - norm_sums[t] * mean) / stdev; - ncc[t * (image_len - template_len + 1)] = (float) c; - } - } + if (var >= acceptedDiff) { + for (t = 0; t < n_templates; ++t){ + double c = ((ccc[(t * fft_len) + startind] / (fft_len * n_templates)) - norm_sums[t] * mean) / stdev; + status += set_ncc(t, 0, template_len, image_len, (float) c, used_chans, pad_array, ncc); + } + } + // Center and divide by length to generate scaled convolution for(i = 1; i < (image_len - template_len + 1); ++i){ // Need to cast to double otherwise we end up with annoying floating @@ -261,81 +338,223 @@ int normxcorr_fftw(float *templates, int template_len, int n_templates, mean = mean + (new_samp - old_samp) / template_len; var += (new_samp - old_samp) * (new_samp - mean + old_samp - old_mean) / (template_len); stdev = sqrt(var); - for (t=0; t < n_templates; ++t){ - if (var > acceptedDiff){ - c = ((ccc[(t * fft_len) + i + startind] / (fft_len * n_templates)) - norm_sums[t] * mean ) / stdev; - ncc[(t * (image_len - template_len + 1)) + i] = (float) c; - } - else{ - ncc[(t * (image_len - template_len + 1)) + i] = 0.0; + if (var > acceptedDiff) { // TODO: above is '>=', should they be the same? + for (t = 0; t < n_templates; ++t){ + double c = ((ccc[(t * fft_len) + i + startind] / (fft_len * n_templates)) - norm_sums[t] * mean ) / stdev; + status += set_ncc(t, i, template_len, image_len, (float) c, used_chans, pad_array, ncc); } } } // Clean up - fftw_destroy_plan(pa); - fftw_destroy_plan(pb); - fftw_destroy_plan(px); + free(norm_sums); - fftw_free(out); - fftw_free(outa); - fftw_free(outb); - fftw_free(ccc); + return status; +} - fftw_cleanup(); - fftw_cleanup_threads(); - free(template_ext); free(image_ext); free(norm_sums); - return 0; -} +static inline int set_ncc(int t, int i, int template_len, int image_len, float value, int *used_chans, int *pad_array, float *ncc) { + int status = 0; + if (used_chans[t] && (i >= pad_array[t])) { + size_t ncc_index = t * (image_len - template_len + 1) + i - pad_array[t]; -int normxcorr_time(float *template, int template_len, float *image, int image_len, float *ccc){ - // Time domain cross-correlation - requires zero-mean template and image - int p, k; - int steps = image_len - template_len + 1; - double numerator = 0.0, denom, mean = 0.0; - double auto_a = 0.0, auto_b = 0.0; + if (isnanf(value)) { + // set NaNs to zero + value = 0.0; + } + else if (fabsf(value) > 1.01) { + // this will raise an exception when we return to Python + status = 1; + } + else if (value > 1.0) { + value = 1.0; + } + else if (value < -1.0) { + value = -1.0; + } - for (k=0; k < template_len; ++k){ - mean += image[k]; - } - mean = mean / template_len; + #pragma omp atomic + ncc[ncc_index] += value; + } - for(p = 0; p < template_len; ++p){ - auto_a += (double) template[p] * (double) template[p]; - numerator += (double) template[p] * ((double) image[p] - mean); - auto_b += ((double) image[p] - mean) * ((double) image[p] - mean); - } - denom = sqrt(auto_a * auto_b); - ccc[0] = (float) (numerator / denom); - for(k = 1; k < steps; ++k){ - mean = mean + (image[k + template_len - 1] - image[k - 1]) / template_len; - numerator = 0.0; - auto_b = 0.0; - for(p = 0; p < template_len; ++p){ - numerator += (double) template[p] * ((double) image[p + k] - mean); - auto_b += ((double) image[p + k] - mean) * ((double) image[p + k] - mean); - } - denom = sqrt(auto_a * auto_b); - ccc[k] = (float) (numerator / denom); - } - return 0; + return status; } -int multi_normxcorr_fftw(float *templates, int n_templates, int template_len, int n_channels, float *image, int image_len, float *ccc, int fft_len){ - int i, r=0; + +void free_fftw_arrays(int size, double **template_ext, double **image_ext, double **ccc, + fftw_complex **outa, fftw_complex **outb, fftw_complex **out) { + int i; + + /* free memory */ + for (i = 0; i < size; i++) { + fftw_free(template_ext[i]); + fftw_free(image_ext[i]); + fftw_free(ccc[i]); + fftw_free(outa[i]); + fftw_free(outb[i]); + fftw_free(out[i]); + } + free(template_ext); + free(image_ext); + free(ccc); + free(outa); + free(outb); + free(out); +} + + +int multi_normxcorr_fftw(float *templates, int n_templates, int template_len, int n_channels, + float *image, int image_len, float *ncc, int fft_len, int *used_chans, int *pad_array){ + int i; + int r = 0; + size_t N2 = (size_t) fft_len / 2 + 1; + double **template_ext = NULL; + double **image_ext = NULL; + double **ccc = NULL; + fftw_complex **outa = NULL; + fftw_complex **outb = NULL; + fftw_complex **out = NULL; + fftw_plan pa, pb, px; + int num_threads = 1; + + + #ifdef N_THREADS + /* set the number of threads - the minimum of the numbers of channels and threads */ + num_threads = (N_THREADS > n_channels) ? n_channels : N_THREADS; + #endif + + /* allocate memory for all threads here */ + template_ext = (double**) malloc(num_threads * sizeof(double*)); + if (template_ext == NULL) { + printf("Error allocating template_ext\n"); + free_fftw_arrays(0, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + image_ext = (double**) malloc(num_threads * sizeof(double*)); + if (image_ext == NULL) { + printf("Error allocating image_ext\n"); + free_fftw_arrays(0, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + ccc = (double**) malloc(num_threads * sizeof(double*)); + if (ccc == NULL) { + printf("Error allocating ccc\n"); + free_fftw_arrays(0, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + outa = (fftw_complex**) malloc(num_threads * sizeof(fftw_complex*)); + if (outa == NULL) { + printf("Error allocating outa\n"); + free_fftw_arrays(0, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + outb = (fftw_complex**) malloc(num_threads * sizeof(fftw_complex*)); + if (outb == NULL) { + printf("Error allocating outb\n"); + free_fftw_arrays(0, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + out = (fftw_complex**) malloc(num_threads * sizeof(fftw_complex*)); + if (out == NULL) { + printf("Error allocating out\n"); + free_fftw_arrays(0, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + + // All memory allocated with `fftw_malloc` to ensure 16-byte aligned. + for (i = 0; i < num_threads; i++) { + /* initialise all to NULL so that freeing on error works */ + template_ext[i] = NULL; + image_ext[i] = NULL; + ccc[i] = NULL; + outa[i] = NULL; + outb[i] = NULL; + out[i] = NULL; + + /* allocate template_ext arrays */ + template_ext[i] = fftw_alloc_real((size_t) fft_len * n_templates); + if (template_ext[i] == NULL) { + printf("Error allocating template_ext[%d]\n", i); + free_fftw_arrays(i + 1, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + + /* allocate image_ext arrays */ + image_ext[i] = fftw_alloc_real(fft_len); + if (image_ext[i] == NULL) { + printf("Error allocating image_ext[%d]\n", i); + free_fftw_arrays(i + 1, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + + /* allocate ccc arrays */ + ccc[i] = fftw_alloc_real((size_t) fft_len * n_templates); + if (ccc[i] == NULL) { + printf("Error allocating ccc[%d]\n", i); + free_fftw_arrays(i + 1, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + + /* allocate outa arrays */ + outa[i] = fftw_alloc_complex((size_t) N2 * n_templates); + if (outa[i] == NULL) { + printf("Error allocating outa[%d]\n", i); + free_fftw_arrays(i + 1, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + + /* allocate outb arrays */ + outb[i] = fftw_alloc_complex((size_t) N2); + if (outb[i] == NULL) { + printf("Error allocating outb[%d]\n", i); + free_fftw_arrays(i + 1, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + + /* allocate out arrays */ + out[i] = fftw_alloc_complex((size_t) N2 * n_templates); + if (out[i] == NULL) { + printf("Error allocating out[%d]\n", i); + free_fftw_arrays(i + 1, template_ext, image_ext, ccc, outa, outb, out); + return -1; + } + } + + //TODO: touch all arrays - NUMA first touch??? + + // We create the plans here since they are not thread safe. + pa = fftw_plan_dft_r2c_2d(n_templates, fft_len, template_ext[0], outa[0], FFTW_ESTIMATE); + pb = fftw_plan_dft_r2c_1d(fft_len, image_ext[0], outb[0], FFTW_ESTIMATE); + px = fftw_plan_dft_c2r_2d(n_templates, fft_len, out[0], ccc[0], FFTW_ESTIMATE); + + /* loop over the channels */ + #pragma omp parallel for reduction(+:r) num_threads(num_threads) for (i = 0; i < n_channels; ++i){ - r = normxcorr_fftw_threaded(&templates[n_templates * template_len * i], template_len, - n_templates, &image[image_len * i], image_len, - &ccc[(image_len - template_len + 1) * n_templates * i], fft_len); + int tid = 0; /* each thread has its own workspace */ + + #ifdef N_THREADS + /* get the id of this thread */ + tid = omp_get_thread_num(); + #endif + + /* initialise memory to zero */ + memset(template_ext[tid], 0, (size_t) fft_len * n_templates * sizeof(double)); + memset(image_ext[tid], 0, (size_t) fft_len * sizeof(double)); + + /* call the routine */ + r += normxcorr_fftw_main(&templates[(size_t) n_templates * template_len * i], template_len, + n_templates, &image[(size_t) image_len * i], image_len, ncc, fft_len, + template_ext[tid], image_ext[tid], ccc[tid], outa[tid], outb[tid], out[tid], + pa, pb, px, &used_chans[(size_t) i * n_templates], + &pad_array[(size_t) i * n_templates]); } + + /* free fftw memory */ + free_fftw_arrays(num_threads, template_ext, image_ext, ccc, outa, outb, out); + fftw_destroy_plan(pa); + fftw_destroy_plan(pb); + fftw_destroy_plan(px); + fftw_cleanup(); + return r; } - -int multi_normxcorr_time(float *templates, int template_len, int n_templates, float *image, int image_len, float *ccc){ - int i; - for (i = 0; i < n_templates; ++i){ - normxcorr_time(&templates[template_len * i], template_len, image, image_len, &ccc[(image_len - template_len + 1) * i]); - } - return 0; -} \ No newline at end of file diff --git a/eqcorrscan/lib/time_corr.c b/eqcorrscan/lib/time_corr.c new file mode 100644 index 000000000..ad15adb17 --- /dev/null +++ b/eqcorrscan/lib/time_corr.c @@ -0,0 +1,120 @@ +/* + * ===================================================================================== + * + * Filename: multi_corr.c + * + * Purpose: Routines for computing cross-correlations in the time-domain + * + * Created: 03/07/17 02:25:07 + * Revision: none + * Compiler: gcc + * + * Author: YOUR NAME (Calum Chamberlain), + * Organization: EQcorrscan + * Copyright: EQcorrscan developers. + * License: GNU Lesser General Public License, Version 3 + * (https://www.gnu.org/copyleft/lesser.html) + * + * ===================================================================================== + */ + +#include +#include +#include +#if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + #include + #ifndef N_THREADS + #define N_THREADS omp_get_max_threads() + #endif +#endif + +int normxcorr_time_threaded(float*, int, float*, int, float*, int); + +int normxcorr_time(float*, int, float*, int, float*); + +int multi_normxcorr_time(float*, int, int, float*, int, float*); + +int multi_normxcorr_time_threaded(float*, int, int, float*, int, float*, int); + +int normxcorr_time_threaded(float *template, int template_len, float *image, int image_len, float *ccc, int num_threads){ + // Time domain cross-correlation - requires zero-mean template + int p, k; + int steps = image_len - template_len + 1; + double * mean = (double *) calloc(steps, sizeof(double)); + double numerator = 0.0, denom; + double auto_a = 0.0, auto_b = 0.0; + + mean[0] = 0; + for (k=0; k < template_len; ++k){ + mean[0] += image[k]; + } + mean[0] = mean[0] / template_len; + + for(k = 1; k < steps; ++k){ + mean[k] = mean[k - 1] + (image[k + template_len - 1] - image[k - 1]) / template_len; + } + for(p = 0; p < template_len; ++p){ + auto_a += (double) template[p] * (double) template[p]; + } + #pragma omp parallel for private(numerator, denom, auto_b, p) num_threads(num_threads) + for(k = 0; k < steps; ++k){ + numerator = 0.0; + auto_b = 0.0; + for(p = 0; p < template_len; ++p){ + numerator += (double) template[p] * ((double) image[p + k] - mean[k]); + auto_b += ((double) image[p + k] - mean[k]) * ((double) image[p + k] - mean[k]); + } + denom = sqrt(auto_a * auto_b); + ccc[k] = (float) (numerator / denom); + } + return 0; +} + +int normxcorr_time(float *template, int template_len, float *image, int image_len, float *ccc){ + // Time domain cross-correlation - requires zero-mean template + int p, k; + int steps = image_len - template_len + 1; + double * mean = (double *) calloc(steps, sizeof(double)); + double numerator = 0.0, denom; + double auto_a = 0.0, auto_b = 0.0; + + mean[0] = 0; + for (k=0; k < template_len; ++k){ + mean[0] += image[k]; + } + mean[0] = mean[0] / template_len; + + for(k = 1; k < steps; ++k){ + mean[k] = mean[k - 1] + (image[k + template_len - 1] - image[k - 1]) / template_len; + } + for(p = 0; p < template_len; ++p){ + auto_a += (double) template[p] * (double) template[p]; + } + for(k = 0; k < steps; ++k){ + numerator = 0.0; + auto_b = 0.0; + for(p = 0; p < template_len; ++p){ + numerator += (double) template[p] * ((double) image[p + k] - mean[k]); + auto_b += ((double) image[p + k] - mean[k]) * ((double) image[p + k] - mean[k]); + } + denom = sqrt(auto_a * auto_b); + ccc[k] = (float) (numerator / denom); + } + return 0; +} + +int multi_normxcorr_time(float *templates, int template_len, int n_templates, float *image, int image_len, float *ccc){ + int i; + for (i = 0; i < n_templates; ++i){ + normxcorr_time(&templates[template_len * i], template_len, image, image_len, &ccc[(image_len - template_len + 1) * i]); + } + return 0; +} + +int multi_normxcorr_time_threaded(float *templates, int template_len, int n_templates, float *image, int image_len, float *ccc, int num_threads){ + int i; + for (i = 0; i < n_templates; ++i){ + normxcorr_time_threaded(&templates[template_len * i], template_len, image, image_len, &ccc[(image_len - template_len + 1) * i], num_threads); + } + return 0; +} \ No newline at end of file diff --git a/eqcorrscan/tests/catalog_to_dd_test.py b/eqcorrscan/tests/catalog_to_dd_test.py index 1c2a79b58..4d9670a32 100644 --- a/eqcorrscan/tests/catalog_to_dd_test.py +++ b/eqcorrscan/tests/catalog_to_dd_test.py @@ -268,16 +268,6 @@ def setUpClass(cls): max_sep=cls.maximum_separation, min_link=cls.minimum_links) - # @classmethod - # def tearDownClass(cls): - # os.remove('phase.dat') - # os.remove('dt.ct') - # if os.path.isfile('dt.ct2'): - # os.remove('dt.ct2') - # os.remove('dt.cc') - # if os.path.isfile('dt.cc2'): - # os.remove('dt.cc2') - def test_write_catalog(self): """ Simple testing function for the write_catalogue function in \ diff --git a/eqcorrscan/tests/catalog_utils_test.py b/eqcorrscan/tests/catalog_utils_test.py index e6824a1f6..491b7788a 100644 --- a/eqcorrscan/tests/catalog_utils_test.py +++ b/eqcorrscan/tests/catalog_utils_test.py @@ -8,13 +8,16 @@ from __future__ import unicode_literals import unittest +import pytest from obspy.clients.fdsn import Client from obspy import UTCDateTime from eqcorrscan.utils.catalog_utils import filter_picks +@pytest.mark.network class CatalogUtilsTests(unittest.TestCase): + @pytest.mark.flaky(reruns=2) # Rerun the test in case of network timeout def test_filter_picks(self): """ Test various methods of filtering picks in a catalog.""" client = Client(str("NCEDC")) diff --git a/eqcorrscan/tests/clustering_test.py b/eqcorrscan/tests/clustering_test.py index 6d8304c8a..59b334077 100644 --- a/eqcorrscan/tests/clustering_test.py +++ b/eqcorrscan/tests/clustering_test.py @@ -7,6 +7,7 @@ from __future__ import unicode_literals import unittest +import pytest import os import glob import warnings @@ -25,9 +26,11 @@ from eqcorrscan.utils.clustering import space_cluster, space_time_cluster +@pytest.mark.network class ClusteringTests(unittest.TestCase): """Main testing routines""" @classmethod + @pytest.mark.flaky(reruns=2) def setUpClass(cls): cls.st1 = read() cls.st2 = cls.st1.copy() @@ -157,9 +160,11 @@ def test_space_time_cluster(self): len(self.cat)) +@pytest.mark.network class ClusteringTestWarnings(unittest.TestCase): """Main testing routines""" @classmethod + @pytest.mark.flaky(reruns=2) def setUpClass(cls): cls.st1 = read() cls.st2 = cls.st1.copy() diff --git a/eqcorrscan/tests/correlate_test.py b/eqcorrscan/tests/correlate_test.py index 6a4607f15..267517545 100644 --- a/eqcorrscan/tests/correlate_test.py +++ b/eqcorrscan/tests/correlate_test.py @@ -2,112 +2,415 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals -import unittest -import numpy as np + +import copy +import itertools import time +from collections import defaultdict +from functools import wraps +import numpy as np +import pytest from obspy import Trace, Stream -from eqcorrscan.utils.correlate import numpy_normxcorr, fftw_normxcorr, \ - time_multi_normxcorr, multichannel_normxcorr - - -class CorrelateTests(unittest.TestCase): - def test_same_various_methods(self): - templates = np.random.randn(200, 200) - stream = np.random.randn(10000) - stream *= stream ** 5 - pads = np.zeros(templates.shape[0], dtype=int) - tic = time.time() - numpy_ccc, no_chans = numpy_normxcorr(templates, stream, pads) - toc = time.time() - print('Scipy took: %f seconds' % (toc-tic)) - tic = time.time() - fftw_ccc, no_chans = fftw_normxcorr( - templates, stream, pads, threaded=False) - toc = time.time() - print('FFTW took: %f seconds' % (toc-tic)) - tic = time.time() - fftw_ccc_2d, no_chans = fftw_normxcorr( - templates, stream, pads, threaded=True) - toc = time.time() - print('FFTW-threaded took: %f seconds' % (toc-tic)) - tic = time.time() - time_ccc, no_chans = time_multi_normxcorr(templates, stream, pads) - toc = time.time() - print('Time-domain took: %f seconds' % (toc-tic)) - self.assertTrue(np.allclose(numpy_ccc, fftw_ccc, atol=0.001)) - self.assertTrue(np.allclose(numpy_ccc, fftw_ccc_2d, atol=0.001)) - self.assertTrue(np.allclose(numpy_ccc, time_ccc, atol=0.001)) - self.assertTrue(np.allclose(fftw_ccc, time_ccc, atol=0.001)) - - def test_multi_channel_xcorr(self): - chans = ['EHZ', 'EHN', 'EHE'] - stas = ['COVA', 'FOZ', 'LARB', 'GOVA', 'MTFO', 'MTBA'] - n_templates = 20 - stream_len = 100000 - template_len = 200 - templates = [] - stream = Stream() +import eqcorrscan.utils.correlate as corr +from eqcorrscan.utils.correlate import register_array_xcorr + +# set seed state for consistent arrays +random = np.random.RandomState(7) + + +# -------------------------- helper functions + + +def gen_xcorr_func(name): + """ return an xcorr function with desired name """ + + def func(templates, stream, pads, *args, **kwargs): + pass + + func.__name__ = str(name) + return func + + +def time_func(func, name, *args, **kwargs): + """ call a func with args and kwargs, print name of func and how + long it took. """ + tic = time.time() + out = func(*args, **kwargs) + toc = time.time() + print('%s took %0.2f seconds' % (name, toc - tic)) + return out + + +def measure_counts(self, func): + """ decorator for counter how often func get called """ + + @wraps(func) + def wrapper(*args, **kwargs): + self.counter[func.__name__] += 1 + return func(*args, **kwargs) + + return wrapper + + +# generate streams for templates and search space + +chans = ['EHZ', 'EHN', 'EHE'] +stas = ['COVA', 'FOZ', 'LARB', 'GOVA', 'MTFO', 'MTBA'] +n_templates = 20 +stream_len = 100000 +template_len = 200 + + +def generate_multichannel_stream(): + stream = Stream() + for station in stas: + for channel in chans: + stream += Trace(data=random.randn(stream_len)) + stream[-1].stats.channel = channel + stream[-1].stats.station = station + return stream + + +def generate_multichannel_templates(): + templates = [] + for i in range(n_templates): + template = Stream() for station in stas: for channel in chans: - stream += Trace(data=np.random.randn(stream_len)) - stream[-1].stats.channel = channel - stream[-1].stats.station = station - for i in range(n_templates): - template = Stream() - for station in stas: - for channel in chans: - template += Trace(data=np.random.randn(template_len)) - template[-1].stats.channel = channel - template[-1].stats.station = station - templates.append(template) - # print("Running time serial") - # tic = time.time() - # cccsums_t_s, no_chans, chans = multichannel_normxcorr( - # templates=templates, stream=stream, cores=None, time_domain=True) - # toc = time.time() - # print('Time-domain in serial took: %f seconds' % (toc-tic)) - print("Running time parallel") - tic = time.time() - cccsums_t_p, no_chans, chans = multichannel_normxcorr( - templates=templates, stream=stream, cores=4, time_domain=True) - toc = time.time() - print('Time-domain in parallel took: %f seconds' % (toc-tic)) - print("Running frequency serial") - tic = time.time() - cccsums_f_s, no_chans, chans = multichannel_normxcorr( - templates=templates, stream=stream, cores=None, time_domain=False) - toc = time.time() - print('Frequency-domain in serial took: %f seconds' % (toc-tic)) - print("Running frequency parallel") - tic = time.time() - cccsums_f_p, no_chans, chans = multichannel_normxcorr( - templates=templates, stream=stream, cores=4, time_domain=False) - toc = time.time() - print('Frequency-domain in parallel took: %f seconds' % (toc-tic)) - print("Running frequency openmp parallel") - tic = time.time() - cccsums_f_op, no_chans, chans = multichannel_normxcorr( - templates=templates, stream=stream, cores=2, time_domain=False, - openmp=True) - toc = time.time() - print('Frequency-domain in parallel took: %f seconds' % (toc-tic)) - print("Finished") - # self.assertTrue(np.allclose(cccsums_t_s, cccsums_t_p, atol=0.00001)) - self.assertTrue(np.allclose(cccsums_f_s, cccsums_f_p, atol=0.00001)) - self.assertTrue(np.allclose(cccsums_f_s, cccsums_f_op, atol=0.00001)) - if not np.allclose(cccsums_t_p, cccsums_f_s, atol=0.00001): - import matplotlib.pyplot as plt - plt.plot(cccsums_t_p[0], 'k', label='Time') - plt.plot(cccsums_f_s[0], 'r', label='Frequency') - plt.legend() - plt.show() - self.assertTrue(np.allclose(cccsums_t_p, cccsums_f_s, atol=0.00001)) - - -if __name__ == '__main__': + template += Trace(data=random.randn(template_len)) + template[-1].stats.channel = channel + template[-1].stats.station = station + templates.append(template) + return templates + + +# ----------------------------- module fixtures + + +# auto run fixtures + +@pytest.fixture(scope='class', autouse=True) +def print_class_name(request): + """ prints the class name before a class test suite starts """ + try: + cls_name = request.cls.__name__ + except AttributeError: # test does not belong to a class + return + else: + dash = '-' * 70 + print('\nstarting tests contained in class %s\n%s' % (cls_name, dash)) + + +# array fixtures + +starting_index = 500 + + +@pytest.fixture(scope='module') +def array_template(): + """ + return a set of templates, generated with randomn, for correlation tests. + """ + return random.randn(200, 200) + + +@pytest.fixture(scope='module') +def array_stream(array_template): """ - Run core tests + Return a stream genearted with randomn for testing normxcorr functions """ - unittest.main() + stream = random.randn(10000) * 5 + # insert a template into the stream so cc == 1 at some place + ar = array_template[0] + stream[starting_index: starting_index + len(ar)] = ar + return stream + + +@pytest.fixture(scope='module') +def pads(array_template): + """ + return an array of zeros for padding, matching templates len. + """ + return np.zeros(array_template.shape[0], dtype=int) + + +@pytest.fixture(scope='module') +def array_ccs(array_template, array_stream, pads): + """ use each function stored in the normcorr cache to correlate the + templates and arrays, return a dict with keys as func names and values + as the cc calculated by said function""" + out = {} + + for name in list(corr.XCORR_FUNCS_ORIGINAL.keys()): + func = corr.get_array_xcorr(name) + cc, _ = time_func(func, name, array_template, array_stream, pads) + out[name] = cc + return out + + +# stream fixtures + + +@pytest.fixture(scope='module') +def multichannel_templates(): + """ create multichannel templates """ + return generate_multichannel_templates() + + +@pytest.fixture(scope='module') +def multichannel_stream(): + """ create a multichannel stream for tests """ + return generate_multichannel_stream() + + +# a dict of all registered stream functions (this is a bit long) +stream_funcs = {fname + '_' + mname: corr.get_stream_xcorr(fname, mname) + for fname in corr.XCORR_FUNCS_ORIGINAL.keys() + for mname in corr.XCORR_STREAM_METHODS + if fname != 'default'} + + +@pytest.fixture(scope='module') +def stream_cc_output_dict(multichannel_templates, multichannel_stream): + """ return a dict of outputs from all stream_xcorr functions """ + # corr._get_array_dicts(multichannel_templates, multichannel_stream) + out = {} + for name, func in stream_funcs.items(): + cc_out = time_func(func, name, multichannel_templates, + multichannel_stream, cores=1) + out[name] = cc_out + return out + + +@pytest.fixture(scope='module') +def stream_cc_dict(stream_cc_output_dict): + """ return just the cc arrays from the stream_cc functions """ + return {name: result[0] for name, result in stream_cc_output_dict.items()} + + +# ----------------------------------- tests + + +class TestArrayCorrelateFunctions: + """ these tests ensure the various implementations of normxcorr return + approximately the same answers """ + atol = .00001 # how close correlations have to be + + # tests + def test_single_channel_similar(self, array_ccs): + """ ensure each of the correlation methods return similar answers + given the same input data """ + cc_list = list(array_ccs.values()) + for cc1, cc2 in itertools.combinations(cc_list, 2): + assert np.allclose(cc1, cc2, atol=self.atol) + + def test_test_autocorrelation(self, array_ccs): + """ ensure an auto correlationoccurred in each of ccs where it is + expected, defined by starting_index variable """ + for name, cc in array_ccs.items(): + assert np.isclose(cc[0, starting_index], 1., atol=self.atol) + + +@pytest.mark.serial +class TestStreamCorrelateFunctions: + """ same thing as TestArrayCorrelateFunction but for stream interface """ + atol = TestArrayCorrelateFunctions.atol + + def test_multi_channel_xcorr(self, stream_cc_dict): + """ test various correlation methods with multiple channels """ + # get correlation results into a list + cc_names = list(stream_cc_dict.keys()) + cc_list = [stream_cc_dict[cc_name] for cc_name in cc_names] + cc_1 = cc_list[0] + # loop over correlations and compare each with the first in the list + # this will ensure all cc are "close enough" + for cc_name, cc in zip(cc_names[2:], cc_list[2:]): + assert np.allclose(cc_1, cc, atol=self.atol) + + +class TestXcorrContextManager: + # fake_cache = copy.deepcopy(corr.XCOR_FUNCS) + + @pytest.fixture + def cache(self): + """ this fixtures resets the class level cache after every test """ + yield copy.deepcopy(corr.XCOR_FUNCS) + + @pytest.fixture + def set_xcorr(self, cache): + return corr._Context(cache, 'default') + + @pytest.fixture + def set_value(self, set_xcorr): + """ set a value in a 'permanent' fashion, return the set function """ + + def func(templates, stream, pads): + pass + + set_xcorr(func) + return func + + # tests + def test_value_was_set(self, set_value, cache): + assert cache['default'] == set_value + + def test_context_manager(self, cache): + """ ensure the context manager reverts changes """ + context = corr._Context(cache, 'default') + old_default = cache['default'] + new_val = corr.numpy_normxcorr + with context(new_val): + assert cache['default'] == new_val + assert old_default + + def test_str_accepted(self): + """ ensure a str of the xcorr function can be passed as well """ + with corr.set_xcorr('numpy'): + func = corr.get_array_xcorr() + assert func is corr.numpy_normxcorr + + +class TestGenericStreamXcorr: + """ tests for stream_xocrr function """ + + # tests + def test_noargs_returns_default(self): + """ ensure passing no args to get_stream_xcorr returns default """ + func = corr.get_stream_xcorr() + default = corr.XCOR_FUNCS['default'].stream_xcorr + assert func is default + + def test_callable_registered(self, multichannel_templates, + multichannel_stream): + """ ensure a callable can be registered """ + small_count = {} + + def some_callable(template_array, stream_array, pad_array): + small_count['name'] = 1 + return corr.numpy_normxcorr(template_array, stream_array, + pad_array) + + func = corr.get_stream_xcorr(some_callable) + func(multichannel_templates, multichannel_stream) + assert 'name' in small_count + + def test_bad_concurrency_raises(self): + """ ensure passing an invalid concurrency argument raises a + ValueError""" + with pytest.raises(ValueError): + corr.get_stream_xcorr(concurrency='node.js') + + def test_loading_unregistered_function_registers(self): + """ ensure if a function in cache hasn't been decoratored it gets + decorated when returned """ + + def func(templates, streams, pads): + pass + + corr.XCOR_FUNCS['func_test'] = func + corr.get_stream_xcorr('func_test') + assert hasattr(corr.XCOR_FUNCS['func_test'], 'registered') + + def test_using_custom_function_doesnt_change_default(self): + """ ensure a custom function will not change the default """ + + def func(templates, streams, pads): + pass + + default = corr.get_array_xcorr(None) + + corr.get_array_xcorr(func) + + assert corr.get_array_xcorr(None) is default + + +class TestRegisterNormXcorrs: + """ Tests for register_normxcorr function, which holds global context + for which xcorr to use """ + + # helper functions + def name_func_is_registered(self, func_name): + """ return True if func is registered as a normxcorr func """ + # Note: don not remove this fixture or bad things will happen + name = func_name.__name__ if callable(func_name) else func_name + return name in corr.XCOR_FUNCS + + # fixtures + @pytest.fixture(scope='class', autouse=True) + def swap_registery(self): + """ copy the current registry, restore it when tests finish""" + current = copy.deepcopy(corr.XCOR_FUNCS) + yield + corr.XCOR_FUNCS = current + + # tests + def test_register_as_decorator_no_args(self): + """ ensure register_normxcorr works as a decorator with no args """ + + @register_array_xcorr + def func1(templates, stream, pads, *args, **kwargs): + pass + + assert self.name_func_is_registered(func1) + + def test_register_as_decorator_with_args(self): + """ ensure register can be used as a decorator with args """ + + @register_array_xcorr(name='func2') + def func(templates, stream, pads, *args, **kwargs): + pass + + assert self.name_func_is_registered('func2') + + def test_register_as_callable(self): + """ ensure register can be used as a callable to take a name + and a normxcorr func """ + func = gen_xcorr_func('funky') + register_array_xcorr(name='func3', func=func) + assert self.name_func_is_registered('func3') + + def test_set_default(self): + """ ensure the default can be overwritten """ + func = gen_xcorr_func('funky') + corr.register_array_xcorr(func, is_default=True) + assert corr.XCOR_FUNCS['default'] is func + + def test_register_bad_func_rasies(self): + """ test trying to register a non-supported function raises """ + func = corr.XCOR_FUNCS['default'] + + with pytest.raises(ValueError): + @func.register('not_supported_value') + def func(): + pass + + +class TestRegisterAlternativeConcurrency: + """ Tests for registering alternative concurrency functions """ + counter = defaultdict(lambda: 0) + + # helper functions + def new_multi(self, template_dict, stream_dict, pad_dict, seed_ids): + pass + + # fixtures + @pytest.fixture + def r_normxcorr(self): + """ return a registered normxcorr function """ + return register_array_xcorr(gen_xcorr_func('normxcorr')) + + @pytest.fixture + def normxcorr_new_multithread(self, r_normxcorr): + """ register the new multithread method """ + func = measure_counts(self, self.new_multi) + r_normxcorr.register('multithread')(func) + r_normxcorr.multithread(None, None, None, None) + yield func.__name__ + self.counter.pop(func.__name__) + + # tests + def test_new_method_was_called(self, normxcorr_new_multithread): + """ ensure the new method was called """ + assert self.counter[normxcorr_new_multithread] diff --git a/eqcorrscan/tests/mag_calc_test.py b/eqcorrscan/tests/mag_calc_test.py index 5980fbd73..90acda73e 100644 --- a/eqcorrscan/tests/mag_calc_test.py +++ b/eqcorrscan/tests/mag_calc_test.py @@ -7,6 +7,7 @@ from __future__ import unicode_literals import unittest +import pytest import numpy as np import os import datetime as dt @@ -44,16 +45,16 @@ def test_dist_calc(self): self.assertEqual(round(dist_calc((90, 90, 0), (90, 89, 0))), 0) self.assertEqual(round(dist_calc((90, 90, 0), (89, 90, 0))), 111) + @pytest.mark.network + @pytest.mark.flaky(reruns=2) def test_sim_WA(self): """Test feeding both PAZ and seedresp.""" t1 = UTCDateTime("2010-09-3T16:30:00.000") t2 = UTCDateTime("2010-09-3T17:00:00.000") fdsn_client = Client('IRIS') - st = fdsn_client.get_waveforms(network='NZ', station='BFZ', - location='10', - channel='HHZ', - starttime=t1, endtime=t2, - attach_response=True) + st = fdsn_client.get_waveforms( + network='NZ', station='BFZ', location='10', channel='HHZ', + starttime=t1, endtime=t2, attach_response=True) tr = st[0] PAZ = {'poles': [-4.440 + 4.440j, -4.440 - 4.440j, -1.083 + 0.0j], 'zeros': [0.0 + 0.0j, 0.0 + 0.0j, 0.0 + 0.0], @@ -75,14 +76,10 @@ def test_sim_WA(self): filename=respf) date = t1 - seedresp = {'filename': respf, # RESP filename - 'date': date, - 'network': tr.stats.network, - 'station': tr.stats.station, - 'channel': tr.stats.channel, - 'location': tr.stats.location, - # Units to return response in ('DIS', 'VEL' or ACC) - 'units': 'DIS' + seedresp = { + 'filename': respf, 'date': date, 'network': tr.stats.network, + 'station': tr.stats.station, 'channel': tr.stats.channel, + 'location': tr.stats.location, 'units': 'DIS' } _sim_WA(trace=tr, PAZ=None, seedresp=seedresp, water_level=10) @@ -189,10 +186,10 @@ def test_amp_pick_sfile(self): '01-0411-15L.S201309') datapath = os.path.join(testing_path, 'WAV', 'TEST_') respdir = testing_path - event = amp_pick_sfile(sfile=sfile, datapath=datapath, - respdir=respdir, chans=['Z'], var_wintype=True, - winlen=0.9, pre_pick=0.2, pre_filt=True, - lowcut=1.0, highcut=20.0, corners=4) + event = amp_pick_sfile( + sfile=sfile, datapath=datapath, respdir=respdir, chans=['Z'], + var_wintype=True, winlen=0.9, pre_pick=0.2, pre_filt=True, + lowcut=1.0, highcut=20.0, corners=4) self.assertTrue(isinstance(event, Event)) self.assertTrue(os.path.isfile('mag_calc.out')) os.remove('mag_calc.out') @@ -339,5 +336,6 @@ def test_amp_pick_no_prefilt(self): var_wintype=False, pre_filt=False) self.assertEqual(len(picked_event.amplitudes), 1) + if __name__ == '__main__': unittest.main() diff --git a/eqcorrscan/tests/match_filter_test.py b/eqcorrscan/tests/match_filter_test.py index a2e513366..d7d58a32c 100644 --- a/eqcorrscan/tests/match_filter_test.py +++ b/eqcorrscan/tests/match_filter_test.py @@ -9,6 +9,7 @@ import copy import os import unittest +import pytest import numpy as np from obspy import read, UTCDateTime, read_events, Catalog, Stream, Trace @@ -96,30 +97,8 @@ def test_spike_test(self): _spike_test(stream) +@pytest.mark.serial class TestSynthData(unittest.TestCase): - def test_debug_range(self): - """Test range of debug outputs""" - # debug == 3 fails on travis due to plotting restrictions. - for debug in range(0, 3): - print('Testing for debug level=%s' % debug) - try: - kfalse, ktrue = test_match_filter(debug=debug) - except RuntimeError: - print('Error plotting, missing test') - continue - if ktrue > 0: - self.assertTrue(kfalse / ktrue < 0.25) - else: - # Randomised data occasionally yields 0 detections - kfalse, ktrue = test_match_filter(debug=debug) - self.assertTrue(kfalse / ktrue < 0.25) - if os.path.isfile('cccsum_0.npy'): - os.remove('cccsum_0.npy') - if os.path.isfile('cccsum_1.npy'): - os.remove('cccsum_1.npy') - if os.path.isfile('peaks_1970-01-01.pdf'): - os.remove('peaks_1970-01-01.pdf') - def test_threshold_methods(self): # Test other threshold methods for threshold_type, threshold in [('absolute', 2), @@ -156,8 +135,10 @@ def test_onesamp_diff(self): plotvar=False) +@pytest.mark.network class TestGeoNetCase(unittest.TestCase): @classmethod + @pytest.mark.flaky(reruns=2) def setUpClass(cls): client = Client('GEONET') cls.t1 = UTCDateTime(2016, 9, 4) @@ -250,6 +231,7 @@ def test_no_matching_data(self): threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1) + @pytest.mark.flaky(reruns=2) def test_geonet_tribe_detect(self): client = Client('GEONET') # Try to force issues with starting samples on wrong day for geonet @@ -266,8 +248,10 @@ def test_geonet_tribe_detect(self): self.assertEqual(len(party), 16) +@pytest.mark.network class TestNCEDCCases(unittest.TestCase): @classmethod + @pytest.mark.flaky(reruns=2) def setUpClass(cls): print('\t\t\t Downloading data') client = Client('NCEDC') @@ -521,8 +505,10 @@ def test_tribe_copy(self): self.assertEqual(tribe, copied) +@pytest.mark.network class TestMatchObjects(unittest.TestCase): @classmethod + @pytest.mark.flaky(reruns=2) def setUpClass(cls): print('\t\t\t Downloading data') client = Client('NCEDC') @@ -629,6 +615,7 @@ def test_tribe_detect(self): det.__dict__[key], check_det.__dict__[key]) # self.assertEqual(fam.template, check_fam.template) + @pytest.mark.flaky(reruns=2) def test_client_detect(self): """Test the client_detect method.""" client = Client('NCEDC') diff --git a/eqcorrscan/tests/pre_processing_test.py b/eqcorrscan/tests/pre_processing_test.py index d3b980833..b9caf1d5e 100644 --- a/eqcorrscan/tests/pre_processing_test.py +++ b/eqcorrscan/tests/pre_processing_test.py @@ -19,9 +19,9 @@ class TestPreProcessing(unittest.TestCase): @classmethod def setUpClass(cls): - testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), - 'test_data', 'day_vols', 'Y2012', - 'R086.01', '*') + testing_path = os.path.join( + os.path.abspath(os.path.dirname(__file__)), 'test_data', + 'day_vols', 'Y2012', 'R086.01', '*') cls.st = read(testing_path) cls.day_start = cls.st[0].stats.starttime.date cls.short_stream = cls.st.copy().trim(cls.st[0].stats.starttime, diff --git a/eqcorrscan/tests/sfile_util_test.py b/eqcorrscan/tests/sfile_util_test.py index cd34e57f2..6d958e1d7 100644 --- a/eqcorrscan/tests/sfile_util_test.py +++ b/eqcorrscan/tests/sfile_util_test.py @@ -7,6 +7,7 @@ from __future__ import unicode_literals import unittest +import pytest from eqcorrscan.utils.sfile_util import eventtosfile, readwavename, readpicks from eqcorrscan.utils.sfile_util import _nortoevmag, _evmagtonor, nordpick @@ -15,6 +16,8 @@ class TestSfileMethods(unittest.TestCase): + @pytest.mark.network + @pytest.mark.flaky(reruns=2) def test_download_write(self): """ Function to download quakeML files from a range of datacenters and \ @@ -328,7 +331,6 @@ def test_read_empty_header(self): by Dominic Evanzia. """ import os - import numpy as np testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'test_data') test_event = readpicks(os.path.join(testing_path, 'Sfile_no_header')) @@ -391,6 +393,8 @@ def test_read_wavename(self): wavefiles = readwavename(testing_path) self.assertEqual(len(wavefiles), 1) + @pytest.mark.network + @pytest.mark.flaky(reruns=2) def test_station_to_seisan(self): from obspy.clients.fdsn import Client from obspy import UTCDateTime diff --git a/eqcorrscan/tests/subspace_test.py b/eqcorrscan/tests/subspace_test.py index c74b2015a..250e28228 100644 --- a/eqcorrscan/tests/subspace_test.py +++ b/eqcorrscan/tests/subspace_test.py @@ -9,6 +9,7 @@ import numpy as np import unittest +import pytest import os import copy @@ -96,11 +97,13 @@ def test_stat(self): self.assertEqual((stat.max().round(6) - 0.229755).round(6), 0) +@pytest.mark.network class SubspaceTestingMethods(unittest.TestCase): """ Main tests for the subspace module. """ @classmethod + @pytest.mark.flaky(reruns=2) def setUpClass(cls): """Set up the test templates.""" cls.templates, cls.st = get_test_data() diff --git a/eqcorrscan/tests/template_gen_test.py b/eqcorrscan/tests/template_gen_test.py index 7cabe9655..30c451007 100644 --- a/eqcorrscan/tests/template_gen_test.py +++ b/eqcorrscan/tests/template_gen_test.py @@ -7,6 +7,7 @@ from __future__ import unicode_literals import unittest +import pytest import glob import os import numpy as np @@ -16,7 +17,6 @@ import copy from obspy import read, UTCDateTime, read_events, Stream -from obspy.clients.fdsn.header import FDSNException from obspy.clients.fdsn import Client from obspy.core.event import Catalog, Event, Origin, Pick, WaveformStreamID @@ -54,6 +54,8 @@ def test_sac_template_gen(self): for tr in template: self.assertEqual(len(tr.data), length * samp_rate) + @pytest.mark.network + @pytest.mark.flaky(reruns=2) def test_tutorial_template_gen(self): """Test template generation from tutorial, uses from_client method. @@ -61,11 +63,7 @@ def test_tutorial_template_gen(self): """ testing_path = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'test_data') - try: - mktemplates(plot=False) - except FDSNException: - warnings.warn('FDSN error, is server down?') - return + mktemplates(plot=False) for template_no in range(4): template = read('tutorial_template_' + str(template_no) + '.ms') expected_template = read(os.path.join( @@ -78,19 +76,20 @@ def test_tutorial_template_gen(self): del(template) os.remove('tutorial_template_' + str(template_no) + '.ms') + @pytest.mark.network + @pytest.mark.flaky(reruns=2) def test_not_delayed(self): """Test the method of template_gen without applying delays to channels.""" - cat = get_geonet_events(minlat=-40.98, maxlat=-40.85, minlon=175.4, - maxlon=175.5, - startdate=UTCDateTime(2016, 5, 1), - enddate=UTCDateTime(2016, 5, 2)) + cat = get_geonet_events( + minlat=-40.98, maxlat=-40.85, minlon=175.4, maxlon=175.5, + startdate=UTCDateTime(2016, 5, 1), enddate=UTCDateTime(2016, 5, 2)) cat = filter_picks(catalog=cat, top_n_picks=5) - template = from_client(catalog=cat, client_id='GEONET', - lowcut=None, highcut=None, samp_rate=100.0, - filt_order=4, length=10.0, prepick=0.5, - swin='all', process_len=3600, - debug=0, plot=False, delayed=False)[0] + template = from_client( + catalog=cat, client_id='GEONET', lowcut=None, highcut=None, + samp_rate=100.0, filt_order=4, length=10.0, prepick=0.5, + swin='all', process_len=3600, debug=0, plot=False, + delayed=False)[0] for tr in template: tr.stats.starttime.precision = 6 starttime = template[0].stats.starttime @@ -101,6 +100,8 @@ def test_not_delayed(self): tr.stats.delta) self.assertEqual(tr.stats.npts, length) + @pytest.mark.network + @pytest.mark.flaky(reruns=2) def test_download_various_methods(self): """ Will download data from server and store in various databases, @@ -109,8 +110,8 @@ def test_download_various_methods(self): client = Client('GEONET') # get the events catalog = Catalog() - data_stream = client._download('http://quakeml.geonet.org.nz/' + - 'quakeml/1.2/2016p008194') + data_stream = client._download( + 'http://quakeml.geonet.org.nz/quakeml/1.2/2016p008194') data_stream.seek(0, 0) catalog += read_events(data_stream, format="quakeml") data_stream.close() @@ -253,16 +254,16 @@ def test_upsample_error(self): class TestEdgeGen(unittest.TestCase): @classmethod def setUpClass(cls): - cls.testing_path = os.path.dirname(os.path.abspath(inspect.getfile( - inspect.currentframe()))) - cls.st = read(os.path.join(cls.testing_path, 'test_data', - 'WAV', 'TEST_', - '2013-09-15-0930-28.DFDPC_027_00')) + cls.testing_path = os.path.dirname( + os.path.abspath(inspect.getfile(inspect.currentframe()))) + cls.st = read(os.path.join( + cls.testing_path, 'test_data', 'WAV', 'TEST_', + '2013-09-15-0930-28.DFDPC_027_00')) for tr in cls.st: tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1] - event = read_event(os.path.join(cls.testing_path, 'test_data', - 'REA', 'TEST_', - '15-0931-08L.S201309')) + event = read_event(os.path.join( + cls.testing_path, 'test_data', 'REA', 'TEST_', + '15-0931-08L.S201309')) cls.picks = event.picks def test_undefined_phase_type(self): @@ -310,10 +311,9 @@ def test_extract_from_stack_and_process(self): length = 3 stack = self.st.copy() template = template_gen(self.picks, self.st.copy(), 2) - extracted = extract_from_stack(stack, template, length=length, - pre_pick=0.3, pre_pad=45, - pre_processed=False, samp_rate=20, - lowcut=2, highcut=8) + extracted = extract_from_stack( + stack, template, length=length, pre_pick=0.3, pre_pad=45, + pre_processed=False, samp_rate=20, lowcut=2, highcut=8) self.assertEqual(len(template), len(extracted)) for tr in extracted: self.assertEqual(tr.stats.endtime - tr.stats.starttime, @@ -323,9 +323,9 @@ def test_extract_from_stack_including_z(self): length = 3 stack = self.st.copy() template = template_gen(self.picks, self.st.copy(), 2) - extracted = extract_from_stack(stack, template, length=length, - pre_pick=0.3, pre_pad=45, - Z_include=True) + extracted = extract_from_stack( + stack, template, length=length, pre_pick=0.3, pre_pad=45, + Z_include=True) self.assertLess(len(template), len(extracted)) for tr in extracted: self.assertEqual(tr.stats.endtime - tr.stats.starttime, diff --git a/eqcorrscan/tests/tutorials_test.py b/eqcorrscan/tests/tutorials_test.py index 230cd637c..629181b89 100644 --- a/eqcorrscan/tests/tutorials_test.py +++ b/eqcorrscan/tests/tutorials_test.py @@ -31,6 +31,7 @@ def setUpClass(cls): os.path.abspath(os.path.dirname(__file__)), 'test_data') @slow + @pytest.mark.flaky(reruns=2) def test_templates_and_match(self): """Call the template creation then the matched-filter tests.""" mktemplates(plot=False) @@ -74,6 +75,7 @@ def test_templates_and_match(self): os.remove('tutorial_template_' + str(template_no) + '.ms') @slow + @pytest.mark.flaky(reruns=2) def test_lag_calc(self): """Test the lag calculation tutorial.""" shift_len = 0.2 @@ -102,6 +104,7 @@ def test_lag_calc(self): self.assertTrue(abs(re_picked_delay) < shift_len) @slow + @pytest.mark.flaky(reruns=2) def test_subspace(self): """Test the subspace tutorial.""" detections = subspace.run_tutorial(plot=False) diff --git a/eqcorrscan/utils/__init__.py b/eqcorrscan/utils/__init__.py index 3f35af8bf..a3b0715c4 100644 --- a/eqcorrscan/utils/__init__.py +++ b/eqcorrscan/utils/__init__.py @@ -12,8 +12,8 @@ __all__ = ['archive_read', 'catalog_to_dd', 'catalog_utils', 'clustering', - 'correlate', 'despike', 'findpeaks', 'mag_calc', 'normalise', - 'parameters', 'picker', 'plotting', 'pre_processing', + 'correlate', 'debug_log', 'despike', 'findpeaks', 'mag_calc', + 'normalise', 'parameters', 'picker', 'plotting', 'pre_processing', 'sac_util', 'seismo_logs', 'sfile_util', 'stacking', 'synth_seis', 'timer', 'trigger'] diff --git a/eqcorrscan/utils/archive_read.py b/eqcorrscan/utils/archive_read.py index e22df1890..36919387c 100644 --- a/eqcorrscan/utils/archive_read.py +++ b/eqcorrscan/utils/archive_read.py @@ -30,7 +30,7 @@ def read_data(archive, arc_type, day, stachans, length=86400): client. If arc_type is day_vols, then this is the path to the top directory. :type arc_type: str - :param arc_type: The type of archive, can be: seishub, FDSN, day_volves + :param arc_type: The type of archive, can be: seishub, FDSN, day_volumes :type day: datetime.date :param day: Date to retrieve data for :type stachans: list @@ -55,27 +55,24 @@ def read_data(archive, arc_type, day, stachans, length=86400): >>> from eqcorrscan.utils.archive_read import read_data >>> from obspy import UTCDateTime >>> t1 = UTCDateTime(2012, 3, 26) - >>> stachans = [('FOZ', 'HHZ'), ('JCZ', 'HHZ')] - >>> st = read_data('GEONET', 'FDSN', t1, stachans) + >>> stachans = [('JCNB', 'SP1')] + >>> st = read_data('NCEDC', 'FDSN', t1, stachans) >>> print(st) - 2 Trace(s) in Stream: - NZ.FOZ.10.HHZ | 2012-03-25T23:59:57.018393Z - 2012-03-27T00:00:00.688393Z \ -| 100.0 Hz, 8640368 samples - NZ.JCZ.10.HHZ | 2012-03-25T23:59:57.348391Z - 2012-03-27T00:00:02.958391Z \ -| 100.0 Hz, 8640562 samples - + 1 Trace(s) in Stream: + BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\ +950000Z | 20.0 Hz, 1728000 samples .. rubric:: Example, missing data >>> from eqcorrscan.utils.archive_read import read_data >>> from obspy import UTCDateTime >>> t1 = UTCDateTime(2012, 3, 26) - >>> stachans = [('FOZ', 'HHZ'), ('GCSZ', 'HHZ')] - >>> st = read_data('GEONET', 'FDSN', t1, stachans) + >>> stachans = [('JCNB', 'SP1'), ('GCSZ', 'HHZ')] + >>> st = read_data('NCEDC', 'FDSN', t1, stachans) >>> print(st) 1 Trace(s) in Stream: - NZ.FOZ.10.HHZ | 2012-03-25T23:59:57.018393Z - 2012-03-27T00:00:00.688393Z \ -| 100.0 Hz, 8640368 samples + BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\ +950000Z | 20.0 Hz, 1728000 samples .. rubric:: Example, local day-volumes diff --git a/eqcorrscan/utils/clustering.py b/eqcorrscan/utils/clustering.py index 5475f73dc..8c3ef24bd 100644 --- a/eqcorrscan/utils/clustering.py +++ b/eqcorrscan/utils/clustering.py @@ -13,24 +13,25 @@ from __future__ import print_function from __future__ import unicode_literals -import numpy as np -import warnings -import matplotlib.pyplot as plt import os - +import warnings from multiprocessing import Pool, cpu_count -from scipy.spatial.distance import squareform -from scipy.cluster.hierarchy import linkage, dendrogram, fcluster -from obspy.signal.cross_correlation import xcorr + +import matplotlib.pyplot as plt +import numpy as np from obspy import Stream, Catalog, UTCDateTime, Trace +from obspy.signal.cross_correlation import xcorr +from scipy.cluster.hierarchy import linkage, dendrogram, fcluster +from scipy.spatial.distance import squareform -from eqcorrscan.utils.mag_calc import dist_calc -from eqcorrscan.utils.correlate import time_multi_normxcorr from eqcorrscan.utils import stacking from eqcorrscan.utils.archive_read import read_data +from eqcorrscan.utils.correlate import get_array_xcorr +from eqcorrscan.utils.mag_calc import dist_calc -def cross_chan_coherence(st1, st2, allow_shift=False, shift_len=0.2, i=0): +def cross_chan_coherence(st1, st2, allow_shift=False, shift_len=0.2, i=0, + xcorr_func='time_domain'): """ Calculate cross-channel coherency. @@ -49,6 +50,11 @@ def cross_chan_coherence(st1, st2, allow_shift=False, shift_len=0.2, i=0): :param shift_len: Samples to shift, only used if `allow_shift=True` :type i: int :param i: index used for parallel async processing, returned unaltered + :type xcorr_func: str, callable + :param xcorr_func: + The method for performing correlations. Accepts either a string or + callabe. See :func:`eqcorrscan.utils.correlate.register_array_xcorr` + for more details :returns: cross channel coherence, float - normalized by number of channels, @@ -57,6 +63,7 @@ def cross_chan_coherence(st1, st2, allow_shift=False, shift_len=0.2, i=0): """ cccoh = 0.0 kchan = 0 + array_xcorr = get_array_xcorr(xcorr_func) for tr in st1: tr2 = st2.select(station=tr.stats.station, channel=tr.stats.channel) @@ -71,7 +78,7 @@ def cross_chan_coherence(st1, st2, allow_shift=False, shift_len=0.2, i=0): kchan += 1 elif len(tr2) > 0: min_len = min(len(tr.data), len(tr2[0].data)) - cccoh += time_multi_normxcorr( + cccoh += array_xcorr( np.array([tr.data[0:min_len]]), tr2[0].data[0:min_len], [0])[0][0][0] kchan += 1 @@ -297,7 +304,7 @@ def group_delays(stream_list): if chan in group_chans[j]: shared_chans.append(chan) shared_delays_slave.append(delays[k]) - shared_delays_master.\ + shared_delays_master. \ append(group_delays[j][group_chans[j].index(chan)]) # Normalize master and slave delay times shared_delays_slave = [delay - min(shared_delays_slave) @@ -382,7 +389,7 @@ def svd(stream_list, full=False): channel=stachan[1]) if chan: if len(chan[0].data) > min_length: - if abs(len(chan[0].data) - min_length) > 0.1 *\ + if abs(len(chan[0].data) - min_length) > 0.1 * \ chan[0].stats.sampling_rate: raise IndexError('More than 0.1 s length ' 'difference, align and fix') @@ -402,7 +409,7 @@ def svd(stream_list, full=False): svalues.append(s) svectors.append(v) uvectors.append(u) - del(chan_mat) + del (chan_mat) return uvectors, svalues, svectors, stachans @@ -448,12 +455,12 @@ def empirical_svd(stream_list, linear=True): tr = st.select(station=stachan[0], channel=stachan[1])[0] if len(tr.data) > min_length: - if abs(len(tr.data) - min_length) > (0.1 * - tr.stats.sampling_rate): - raise IndexError('More than 0.1 s length ' - 'difference, align and fix') - warnings.warn(str(tr) + ' is not the same length as others, ' + - 'trimming the end') + sr = tr.stats.sampling_rate + if abs(len(tr.data) - min_length) > (0.1 * sr): + msg = 'More than 0.1 s length difference, align and fix' + raise IndexError(msg) + msg = ' is not the same length as others, trimming the end' + warnings.warn(str(tr) + msg) tr.data = tr.data[0:min_length] if linear: first_subspace = stacking.linstack(stream_list) @@ -462,8 +469,9 @@ def empirical_svd(stream_list, linear=True): second_subspace = first_subspace.copy() for i in range(len(second_subspace)): second_subspace[i].data = np.diff(second_subspace[i].data) - second_subspace[i].stats.starttime += 0.5 * \ - second_subspace[i].stats.delta + delta = second_subspace[i].stats.delta + second_subspace[i].stats.starttime += 0.5 * delta + return [first_subspace, second_subspace] @@ -542,8 +550,9 @@ def corr_cluster(trace_list, thresh=0.9): stack = stacking.linstack([Stream(tr) for tr in trace_list])[0] output = np.array([False] * len(trace_list)) group1 = [] + array_xcorr = get_array_xcorr() for i, tr in enumerate(trace_list): - if time_multi_normxcorr( + if array_xcorr( np.array([tr.data]), stack.data, [0])[0][0][0] > 0.6: output[i] = True group1.append(tr) @@ -553,7 +562,7 @@ def corr_cluster(trace_list, thresh=0.9): stack = stacking.linstack([Stream(tr) for tr in group1])[0] group2 = [] for i, tr in enumerate(trace_list): - if time_multi_normxcorr( + if array_xcorr( np.array([tr.data]), stack.data, [0])[0][0][0] > thresh: group2.append(tr) output[i] = True @@ -750,10 +759,9 @@ def extract_detections(detections, templates, archive, arc_type, detection.detect_time.strftime('%Y/%m/%d %H:%M:%S')) detect_wav = st.copy() for tr in detect_wav: - tr.trim(starttime=UTCDateTime(detection.detect_time) - - extract_len / 2, - endtime=UTCDateTime(detection.detect_time) + - extract_len / 2) + t1 = UTCDateTime(detection.detect_time) - extract_len / 2 + t2 = UTCDateTime(detection.detect_time) + extract_len / 2 + tr.trim(starttime=t1, endtime=t2) if outdir: if not os.path.isdir(os.path.join(outdir, detection.template_name)): @@ -766,7 +774,7 @@ def extract_detections(detections, templates, archive, arc_type, print('Written file: %s' % '/'.join([outdir, detection.template_name, detection.detect_time. - strftime('%Y-%m-%d_%H-%M-%S') + '.ms'])) + strftime('%Y-%m-%d_%H-%M-%S') + '.ms'])) if not outdir: detection_wavefiles.append(detect_wav) del detect_wav @@ -987,9 +995,11 @@ def re_thresh_csv(path, old_thresh, new_thresh, chan_thresh): detections_out = 0 for detection in old_detections: detections_in += 1 - if abs(detection.detect_val) >=\ - (new_thresh / old_thresh) * detection.threshold and\ - detection.no_chans >= chan_thresh: + con1 = (new_thresh / old_thresh) * detection.threshold + con2 = detection.no_chans >= chan_thresh + requirted_thresh = (new_thresh / old_thresh) * detection.threshold + con3 = abs(detection.detect_val) >= requirted_thresh + if all([con1, con2, con3]): detections_out += 1 detections.append(detection) print('Read in %i detections' % detections_in) @@ -999,4 +1009,5 @@ def re_thresh_csv(path, old_thresh, new_thresh, chan_thresh): if __name__ == "__main__": import doctest + doctest.testmod() diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py index 07c9c1301..fcc3f908e 100644 --- a/eqcorrscan/utils/correlate.py +++ b/eqcorrscan/utils/correlate.py @@ -5,7 +5,7 @@ routine using FFTW, a Numpy fft routine which uses bottleneck for normalisation and a compiled time-domain routine. These have varying levels of efficiency, both in terms of overall speed, and in memory usage. The time-domain is the -most memory efficient but slowest routine (although fastest for small cases of +most memory efficient but slowest routine (although fast for small cases of less than a few hundred correlations), the Numpy routine is fast, but memory inefficient due to a need to store large double-precision arrays for normalisation. The fftw compiled routine is fastest and more memory efficient @@ -24,18 +24,292 @@ from __future__ import print_function from __future__ import unicode_literals -import numpy as np +import contextlib +import copy import ctypes +import os +from multiprocessing import Pool as ProcessPool, cpu_count +from multiprocessing.pool import ThreadPool + +import numpy as np from future.utils import native_str -from multiprocessing import Pool -from scipy.fftpack.helper import next_fast_len from eqcorrscan.utils.libnames import _load_cdll +# This is for building docs on readthedocs, which has an old version of +# scipy - without this, this module cannot be imported, which breaks the docs +# Instead we define a dummy function that returns what it is given. +READ_THE_DOCS = os.environ.get('READTHEDOCS', None) == 'True' +if not READ_THE_DOCS: + from scipy.fftpack.helper import next_fast_len +else: + def next_fast_len(a): + return a + +XCOR_FUNCS = {} # cache of functions for doing cross correlations + +# methods added to each xcorr func registered +# these implement the stream interface +XCORR_STREAM_METHODS = ('multithread', 'multiprocess', 'concurrent', + 'stream_xcorr') + +# these implement the array interface +XCOR_ARRAY_METHODS = ('array_xcorr') + + +# ------------------ Context manager for switching out default +class _Context: + """ class for permanently or temporarily changing items in a dict """ + + def __init__(self, cache, value_to_switch): + """ + :type cache: dict + :param cache: A dict to store values in + :type value_to_switch: str + :param value_to_switch: + The key in cache to switch based on different contexts + """ + self.cache = cache + self.value_to_switch = value_to_switch + self.previous_value = None + + def __call__(self, new_value, *args, **kwargs): + """ # TODO change docs if this ever becomes general use + Set a new value for the default xcorr function. + + This function can be called directly to permanently change the + default normxcorr function or it may be used as a context manager + to only modify it temporarily. + + :param new_value: + :return: + """ + self.previous_value = self.cache.get(self.value_to_switch) + self.cache[self.value_to_switch] = get_array_xcorr(new_value) + return self + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + self.revert() + + def __repr__(self): + """ this hides the fact _Context instance are returned after calls """ + name = self.cache[self.value_to_switch].__str__() + if hasattr(self.cache[self.value_to_switch], '__name__'): + name = self.cache[self.value_to_switch].__name__ + out_str = ("%s changed to %s" % (self.value_to_switch, name)) + return out_str + + def revert(self): + """ revert the default xcorr function to previous value """ + new_value = self.previous_value + self(new_value) + + +set_xcorr = _Context(XCOR_FUNCS, 'default') + + +# ---------------------- generic concurrency functions + +@contextlib.contextmanager +def _pool_boy(Pool, traces, **kwargs): + """ + A context manager for handling the setup and cleanup of a pool object. + + :param Pool: any Class (not instance) that implements the multiprocessing + Pool interface + :param traces: The number of traces to process + :type traces: int + """ + # All parallel processing happens on a per-trace basis, we shouldn't create + # more workers than there are traces + n_cores = kwargs.get('cores', cpu_count()) + if n_cores > traces: + n_cores = traces + pool = Pool(n_cores) + yield pool + pool.close() + pool.join() + + +def _pool_normxcorr(templates, stream, pool, func, *args, **kwargs): + chans = [[] for _i in range(len(templates))] + array_dict_tuple = _get_array_dicts(templates, stream) + stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple + # get parameter iterator + params = ((template_dict[sid], stream_dict[sid], pad_dict[sid]) + for sid in seed_ids) + # get cc results and used chans into their own lists + results = [pool.apply_async(func, param) for param in params] + xcorrs, tr_chans = zip(*(res.get() for res in results)) + cccsums = np.sum(xcorrs, axis=0) + no_chans = np.sum(np.array(tr_chans).astype(np.int), axis=0) + for seed_id, tr_chan in zip(seed_ids, tr_chans): + for chan, state in zip(chans, tr_chan): + if state: + chan.append((seed_id.split('.')[1], + seed_id.split('.')[-1].split('_')[0])) + return cccsums, no_chans, chans + + +def _general_multithread(func): + """ return the general multithreading function using func """ + + def multithread(templates, stream, *args, **kwargs): + with _pool_boy(ThreadPool, len(stream), **kwargs) as pool: + return _pool_normxcorr(templates, stream, pool=pool, func=func) + + return multithread + + +def _general_multiprocess(func): + def multiproc(templates, stream, *args, **kwargs): + with _pool_boy(ProcessPool, len(stream), **kwargs) as pool: + return _pool_normxcorr(templates, stream, pool=pool, func=func) -def numpy_normxcorr(templates, stream, pads): + return multiproc + + +def _general_serial(func): + def stream_xcorr(templates, stream, *args, **kwargs): + no_chans = np.zeros(len(templates)) + chans = [[] for _ in range(len(templates))] + array_dict_tuple = _get_array_dicts(templates, stream) + stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple + cccsums = np.zeros([len(templates), + len(stream[0]) - len(templates[0][0]) + 1]) + for seed_id in seed_ids: + tr_cc, tr_chans = func(template_dict[seed_id], + stream_dict[seed_id], + pad_dict[seed_id]) + cccsums = np.sum([cccsums, tr_cc], axis=0) + no_chans += tr_chans.astype(np.int) + for chan, state in zip(chans, tr_chans): + if state: + chan.append((seed_id.split('.')[1], + seed_id.split('.')[-1].split('_')[0])) + return cccsums, no_chans, chans + + return stream_xcorr + + +def register_array_xcorr(name, func=None, is_default=False): """ - Compute the normalized cross-correlation of multiple templates with data. + Decorator for registering correlation functions. + + Each function must have the same interface as numpy_normxcorr, which is + *f(templates, stream, pads, *args, **kwargs)* any number of specific kwargs + can be used. + + Register_normxcorr can be used as a decorator (with or without arguments) + or as a callable. + + :param name: The name of the function for quick access, or the callable + that will be wrapped when used as a decorator. + :type name: str, callable + :param func: The function to register + :type func: callable, optional + :param is_default: True if this function should be marked as default + normxcorr + :type is_default: bool + + :return: callable + """ + valid_methods = set(list(XCOR_ARRAY_METHODS) + list(XCORR_STREAM_METHODS)) + cache = {} + + def register(register_str): + """ + Register a function as an implementation. + + :param register_str: The registration designation + :type register_str: str + """ + if register_str not in valid_methods: + msg = 'register_name must be in %s' % valid_methods + raise ValueError(msg) + + def _register(func): + cache[register_str] = func + setattr(cache['func'], register_str, func) + return func + + return _register + + def wrapper(func, func_name=None): + # register the functions in the XCOR + fname = func_name or name.__name__ if callable(name) else str(name) + XCOR_FUNCS[fname] = func + if is_default: # set function as default + XCOR_FUNCS['default'] = func + # attach some attrs, this is a bit of a hack to avoid pickle problems + func.register = register + cache['func'] = func + func.multithread = _general_multithread(func) + func.multiprocess = _general_multiprocess(func) + func.concurrent = _general_multithread(func) + func.stream_xcorr = _general_serial(func) + func.array_xcorr = func + func.registered = True + return func + + # used as a decorator + if callable(name): + return wrapper(name) + + # used as a normal function (called and passed a function) + if callable(func): + return wrapper(func, func_name=name) + + # called, then used as a decorator + return wrapper + + +# ------------------ array_xcorr fetching functions + + +def _get_registerd_func(name_or_func): + """ get a xcorr function from a str or callable. """ + # get the function or register callable + if callable(name_or_func): + func = register_array_xcorr(name_or_func) + else: + func = XCOR_FUNCS[name_or_func or 'default'] + assert callable(func), 'func is not callable' + # ensure func has the added methods + if not hasattr(func, 'registered'): + func = register_array_xcorr(func) + return func + + +def get_array_xcorr(name_or_func=None): + """ + Get an normalized cross correlation function that takes arrays as inputs. + + See :func:`eqcorrscan.utils.correlate.array_normxcorr` for expected + function signature. + + :param name_or_func: Either a name of a registered xcorr function or a + callable that implements the standard array_normxcorr signature. + :type name_or_func: str or callable + + :return: callable wth array_normxcorr interface + + see also :func:`eqcorrscan.utils.correlate.get_stream_xcorr` + """ + func = _get_registerd_func(name_or_func) + return func.array_xcorr + + +# ----------------------- registered array_xcorr functions + + +@register_array_xcorr('numpy') +def numpy_normxcorr(templates, stream, pads, *args, **kwargs): + """ + Compute the normalized cross-correlation using numpy and bottleneck. :param templates: 2D Array of templates :type templates: np.ndarray @@ -64,6 +338,8 @@ def numpy_normxcorr(templates, stream, pads): stream, template_length)[template_length - 1:] stream_std_array = bottleneck.move_std( stream, template_length)[template_length - 1:] + # because stream_std_array is in denominator or res, nan all 0s + stream_std_array[stream_std_array == 0] = np.nan # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / ( templates.std(axis=-1, keepdims=True) * template_length)) @@ -75,134 +351,15 @@ def numpy_normxcorr(templates, stream, pads): res = ((_centered(res, stream_length - template_length + 1)) - norm_sum * stream_mean_array) / stream_std_array res[np.isnan(res)] = 0.0 - for i in range(len(pads)): - res[i] = np.append(res[i], np.zeros(pads[i]))[pads[i]:] + # res[np.isinf(res)] = 0.0 + for i, pad in enumerate(pads): # range(len(pads)): + res[i] = np.append(res[i], np.zeros(pad))[pad:] return res.astype(np.float32), used_chans -def multichannel_normxcorr(templates, stream, cores=1, time_domain=False, - openmp=False): - """ - Cross-correlate multiple channels either in parallel or not - - :type templates: list - :param templates: - A list of templates, where each one should be an obspy.Stream object - containing multiple traces of seismic data and the relevant header - information. - :type stream: obspy.core.stream.Stream - :param stream: - A single Stream object to be correlated with the templates. - :type cores: int - :param cores: - Number of processes to use, if set to None, no Python multiprocessing - will be done. - :type cores: int - :param cores: Number of cores to loop over - :type time_domain: bool - :param time_domain: - Whether to compute in the time-domain using the compiled openMP - parallel cross-correlation routine. - :type openmp: bool - :param openmp: Whether to use the openmp, compiled threaded loop or not. - - :returns: - New list of :class:`numpy.ndarray` objects. These will contain - the correlation sums for each template for this day of data. - :rtype: list - :returns: - list of ints as number of channels used for each cross-correlation. - :rtype: list - :returns: - list of list of tuples of station, channel for all cross-correlations. - :rtype: list - - .. Note:: - Each template must contain the same channels as every other template, - the stream must also contain the same channels (note that if there - are duplicate channels in the template you do not need duplicate - channels in the stream). - """ - no_chans = np.zeros(len(templates)) - chans = [[] for _i in range(len(templates))] - # Do some reshaping - stream.sort(['network', 'station', 'location', 'channel']) - t_starts = [] - for template in templates: - template.sort(['network', 'station', 'location', 'channel']) - t_starts.append(min([tr.stats.starttime for tr in template])) - seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])] - template_array = {} - stream_array = {} - pad_array = {} - for i, seed_id in enumerate(seed_ids): - t_ar = np.array([template[i].data - for template in templates]).astype(np.float32) - template_array.update({seed_id: t_ar}) - stream_array.update( - {seed_id: stream.select( - id=seed_id.split('_')[0])[0].data.astype(np.float32)}) - pad_list = [ - int(round(template[i].stats.sampling_rate * - (template[i].stats.starttime - t_starts[j]))) - for j, template in zip(range(len(templates)), templates)] - pad_array.update({seed_id: pad_list}) - if cores is None and not openmp: - cccsums = np.zeros([len(templates), - len(stream[0]) - len(templates[0][0]) + 1]) - for seed_id in seed_ids: - if time_domain: - tr_xcorrs, tr_chans = time_multi_normxcorr( - templates=template_array[seed_id], - stream=stream_array[seed_id], pads=pad_array[seed_id]) - else: - tr_xcorrs, tr_chans = fftw_normxcorr( - templates=template_array[seed_id], - stream=stream_array[seed_id], pads=pad_array[seed_id], - threaded=True) - cccsums = np.sum([cccsums, tr_xcorrs], axis=0) - no_chans += tr_chans.astype(np.int) - for chan, state in zip(chans, tr_chans): - if state: - chan.append((seed_id.split('.')[1], - seed_id.split('.')[-1].split('_')[0])) - elif not openmp: - pool = Pool(processes=cores) - if time_domain: - results = [pool.apply_async(time_multi_normxcorr, ( - template_array[seed_id], stream_array[seed_id], - pad_array[seed_id])) for seed_id in seed_ids] - else: - results = [pool.apply_async(fftw_normxcorr, ( - template_array[seed_id], stream_array[seed_id], - pad_array[seed_id], False)) for seed_id in seed_ids] - pool.close() - results = [p.get() for p in results] - xcorrs = [p[0] for p in results] - tr_chans = np.array([p[1] for p in results]) - pool.join() - cccsums = np.sum(xcorrs, axis=0) - no_chans = np.sum(tr_chans.astype(np.int), axis=0) - for seed_id, tr_chan in zip(seed_ids, tr_chans): - for chan, state in zip(chans, tr_chan): - if state: - chan.append((seed_id.split('.')[1], - seed_id.split('.')[-1].split('_')[0])) - else: - xcorrs, tr_chans = fftw_multi_normxcorr( - template_array=template_array, stream_array=stream_array, - pad_array=pad_array, seed_ids=seed_ids) - cccsums = np.sum(xcorrs, axis=0) - no_chans = np.sum(np.array(tr_chans).astype(np.int), axis=0) - for seed_id, tr_chan in zip(seed_ids, tr_chans): - for chan, state in zip(chans, tr_chan): - if state: - chan.append((seed_id.split('.')[1], - seed_id.split('.')[-1].split('_')[0])) - return cccsums, no_chans, chans - - -def time_multi_normxcorr(templates, stream, pads): +@register_array_xcorr('time_domain') +def time_multi_normxcorr(templates, stream, pads, threaded=False, *args, + **kwargs): """ Compute cross-correlations in the time-domain using C routine. @@ -212,6 +369,8 @@ def time_multi_normxcorr(templates, stream, pads): :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list + :param threaded: Whether to use the threaded routine or not + :type threaded: bool :return: np.ndarray of cross-correlations :return: np.ndarray channels used @@ -220,7 +379,7 @@ def time_multi_normxcorr(templates, stream, pads): utilslib = _load_cdll('libutils') - utilslib.multi_normxcorr_time.argtypes = [ + argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, @@ -229,8 +388,14 @@ def time_multi_normxcorr(templates, stream, pads): ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS'))] - utilslib.multi_normxcorr_time.restype = ctypes.c_int - + restype = ctypes.c_int + if threaded: + func = utilslib.multi_normxcorr_time_threaded + argtypes.append(ctypes.c_int) + else: + func = utilslib.multi_normxcorr_time + func.argtypes = argtypes + func.restype = restype # Need to de-mean everything templates_means = templates.mean(axis=1).astype(np.float32)[:, np.newaxis] stream_mean = stream.mean().astype(np.float32) @@ -242,9 +407,11 @@ def time_multi_normxcorr(templates, stream, pads): ccc = np.ascontiguousarray( np.empty((image_len - template_len + 1) * n_templates), np.float32) t_array = np.ascontiguousarray(templates.flatten(), np.float32) - utilslib.multi_normxcorr_time( - t_array, template_len, n_templates, - np.ascontiguousarray(stream, np.float32), image_len, ccc) + time_args = [t_array, template_len, n_templates, + np.ascontiguousarray(stream, np.float32), image_len, ccc] + if threaded: + time_args.append(kwargs.get('cores', cpu_count())) + func(*time_args) ccc[np.isnan(ccc)] = 0.0 ccc = ccc.reshape((n_templates, image_len - template_len + 1)) for i in range(len(pads)): @@ -254,7 +421,8 @@ def time_multi_normxcorr(templates, stream, pads): return ccc, used_chans -def fftw_normxcorr(templates, stream, pads, threaded=True): +@register_array_xcorr('fftw', is_default=True) +def fftw_normxcorr(templates, stream, pads, threaded=False, *args, **kwargs): """ Normalised cross-correlation using the fftw library. @@ -291,54 +459,144 @@ def fftw_normxcorr(templates, stream, pads, threaded=True): np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, - np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, + np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), - ctypes.c_int] + ctypes.c_int, + np.ctypeslib.ndpointer(dtype=np.intc, + flags=native_str('C_CONTIGUOUS')), + np.ctypeslib.ndpointer(dtype=np.intc, + flags=native_str('C_CONTIGUOUS'))] restype = ctypes.c_int + if threaded: func = utilslib.normxcorr_fftw_threaded else: func = utilslib.normxcorr_fftw + func.argtypes = argtypes func.restype = restype + # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) template_length = templates.shape[1] stream_length = len(stream) n_templates = templates.shape[0] fftshape = next_fast_len(template_length + stream_length - 1) + # # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / ( templates.std(axis=-1, keepdims=True) * template_length)) norm = np.nan_to_num(norm) - ccc = np.empty((n_templates, stream_length - template_length + 1), - np.float32).flatten(order='C') + ccc = np.zeros((n_templates, stream_length - template_length + 1), + np.float32) + used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) + pads_np = np.ascontiguousarray(pads, dtype=np.intc) + ret = func( np.ascontiguousarray(norm.flatten(order='C'), np.float32), template_length, n_templates, np.ascontiguousarray(stream, np.float32), stream_length, - np.ascontiguousarray(ccc, np.float32), fftshape) + np.ascontiguousarray(ccc, np.float32), fftshape, + used_chans_np, pads_np) if ret != 0: print(ret) raise MemoryError() - ccc = ccc.reshape((n_templates, stream_length - template_length + 1)) - for i in range(n_templates): - if not used_chans[i]: - ccc[i] = np.zeros(stream_length - template_length + 1) - ccc[np.isnan(ccc)] = 0.0 - if np.any(np.abs(ccc) > 1.01): - print('Normalisation error in C code') - print(ccc.max()) - print(ccc.min()) - raise MemoryError() - ccc[ccc > 1.0] = 1.0 - ccc[ccc < -1.0] = -1.0 - for i in range(len(pads)): - ccc[i] = np.append(ccc[i], np.zeros(pads[i]))[pads[i]:] + return ccc, used_chans +# The time-domain routine can be sped up massively on large machines (many +# threads) using the openMP threaded functions. + +@time_multi_normxcorr.register('concurrent') +def _time_threaded_normxcorr(templates, stream, *args, **kwargs): + """ + Use the threaded time-domain routine for concurrency + + :type templates: list + :param templates: + A list of templates, where each one should be an obspy.Stream object + containing multiple traces of seismic data and the relevant header + information. + :type stream: obspy.core.stream.Stream + :param stream: + A single Stream object to be correlated with the templates. + + :returns: + New list of :class:`numpy.ndarray` objects. These will contain + the correlation sums for each template for this day of data. + :rtype: list + :returns: + list of ints as number of channels used for each cross-correlation. + :rtype: list + :returns: + list of list of tuples of station, channel for all cross-correlations. + :rtype: list + """ + no_chans = np.zeros(len(templates)) + chans = [[] for _ in range(len(templates))] + array_dict_tuple = _get_array_dicts(templates, stream) + stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple + cccsums = np.zeros([len(templates), + len(stream[0]) - len(templates[0][0]) + 1]) + for seed_id in seed_ids: + tr_cc, tr_chans = time_multi_normxcorr( + template_dict[seed_id], stream_dict[seed_id], pad_dict[seed_id], + True) + cccsums = np.sum([cccsums, tr_cc], axis=0) + no_chans += tr_chans.astype(np.int) + for chan, state in zip(chans, tr_chans): + if state: + chan.append((seed_id.split('.')[1], + seed_id.split('.')[-1].split('_')[0])) + return cccsums, no_chans, chans + + +# TODO: This should be turned back on when openMP loop is merged +# @fftw_normxcorr.register('stream_xcorr') +@fftw_normxcorr.register('concurrent') +def _fftw_stream_xcorr(templates, stream, *args, **kwargs): + """ + Apply fftw normxcorr routine concurrently. + + :type templates: list + :param templates: + A list of templates, where each one should be an obspy.Stream object + containing multiple traces of seismic data and the relevant header + information. + :type stream: obspy.core.stream.Stream + :param stream: + A single Stream object to be correlated with the templates. + + :returns: + New list of :class:`numpy.ndarray` objects. These will contain + the correlation sums for each template for this day of data. + :rtype: list + :returns: + list of ints as number of channels used for each cross-correlation. + :rtype: list + :returns: + list of list of tuples of station, channel for all cross-correlations. + :rtype: list + """ + + chans = [[] for _i in range(len(templates))] + array_dict_tuple = _get_array_dicts(templates, stream) + stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple + assert set(seed_ids) + cccsums, tr_chans = fftw_multi_normxcorr( + template_array=template_dict, stream_array=stream_dict, + pad_array=pad_dict, seed_ids=seed_ids) + no_chans = np.sum(np.array(tr_chans).astype(np.int), axis=0) + for seed_id, tr_chan in zip(seed_ids, tr_chans): + for chan, state in zip(chans, tr_chan): + if state: + chan.append((seed_id.split('.')[1], + seed_id.split('.')[-1].split('_')[0])) + return cccsums, no_chans, chans + + def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids): """ Use a C loop rather than a Python loop - in some cases this will be fast. @@ -358,15 +616,19 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids): utilslib = _load_cdll('libutils') utilslib.multi_normxcorr_fftw.argtypes = [ - np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, + np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, ctypes.c_int, - np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, + np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, - np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, + np.ctypeslib.ndpointer(dtype=np.float32, + flags=native_str('C_CONTIGUOUS')), + ctypes.c_int, + np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), - ctypes.c_int] + np.ctypeslib.ndpointer(dtype=np.intc, + flags=native_str('C_CONTIGUOUS'))] utilslib.multi_normxcorr_fftw.restype = ctypes.c_int ''' Arguments are: @@ -378,7 +640,10 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids): image length cross-correlations (stacked as per image) fft-length + used channels (stacked as per templates) + pad array (stacked as per templates) ''' + # pre processing used_chans = [] template_len = template_array[seed_ids[0]].shape[1] for seed_id in seed_ids: @@ -393,38 +658,104 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids): n_templates = template_array[seed_ids[0]].shape[0] image_len = stream_array[seed_ids[0]].shape[0] fft_len = next_fast_len(template_len + image_len - 1) - template_array = np.array(list(template_array.values())).flatten(order='C') - stream_array = np.array(list(stream_array.values())).flatten(order='C') - cccs = np.empty((n_channels, n_templates, image_len - template_len + 1), - np.float32).flatten(order='C') + template_array = np.ascontiguousarray([template_array[x] + for x in seed_ids], + dtype=np.float32) + stream_array = np.ascontiguousarray([stream_array[x] for x in seed_ids], + dtype=np.float32) + cccs = np.zeros((n_templates, image_len - template_len + 1), + np.float32) + used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) + pad_array_np = np.ascontiguousarray([pad_array[seed_id] + for seed_id in seed_ids], + dtype=np.intc) + + # call C function ret = utilslib.multi_normxcorr_fftw( template_array, n_templates, template_len, n_channels, stream_array, - image_len, cccs, fft_len) - if ret != 0: + image_len, cccs, fft_len, used_chans_np, pad_array_np) + if ret < 0: raise MemoryError() - cccs = cccs.reshape((n_channels, n_templates, - image_len - template_len + 1)) - for j in range(n_channels): - for i in range(n_templates): - if not used_chans[j][i]: - cccs[j][i] = np.zeros(image_len - template_len + 1) - cccs[np.isnan(cccs)] = 0.0 - if np.any(np.abs(cccs) > 1.01): - print('Normalisation error in C code') + elif ret > 0: + print('Error in C code (possible normalisation error)') print(cccs.max()) print(cccs.min()) raise MemoryError() - cccs[cccs > 1.0] = 1.0 - cccs[cccs < -1.0] = -1.0 - for j, seed_id in enumerate(seed_ids): - for i in range(len(pad_array[seed_id])): - cccs[j][i] = np.append( - cccs[j][i], - np.zeros(pad_array[seed_id][i]))[pad_array[seed_id][i]:] - cccs = cccs.reshape(n_channels, n_templates, image_len - template_len + 1) + return cccs, used_chans +# ------------------------------- stream_xcorr functions + + +def get_stream_xcorr(name_or_func=None, concurrency=None): + """ + Return a function for performing normalized cross correlation on lists of + streams. + + :param name_or_func: + Either a name of a registered function or a callable that implements + the standard array_normxcorr signature. + :param concurrency: + Optional concurrency strategy, options are below. + + :return: A callable with the interface of stream_normxcorr + + :Concurrency options: + - multithread - use a threadpool for concurrency; + - multiprocess - use a process pool for concurrency; + - concurrent - use a customized concurrency strategy for the function, + if not defined threading will be used. + """ + func = _get_registerd_func(name_or_func) + + concur = concurrency or 'stream_xcorr' + if not hasattr(func, concur): + msg = '%s does not support concurrency %s' % (func.__name__, concur) + raise ValueError(msg) + return getattr(func, concur) + + +# --------------------------- stream prep functions + + +def _get_array_dicts(templates, stream, copy_streams=True): + """ prepare templates and stream, return dicts """ + # Do some reshaping + # init empty structures for data storage + template_dict = {} + stream_dict = {} + pad_dict = {} + t_starts = [] + + stream.sort(['network', 'station', 'location', 'channel']) + for template in templates: + template.sort(['network', 'station', 'location', 'channel']) + t_starts.append(min([tr.stats.starttime for tr in template])) + # get seed ids, make sure these are collected on sorted streams + seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])] + # pull common channels out of streams and templates and put in dicts + for i, seed_id in enumerate(seed_ids): + temps_with_seed = [template[i].data for template in templates] + t_ar = np.array(temps_with_seed).astype(np.float32) + template_dict.update({seed_id: t_ar}) + stream_dict.update( + {seed_id: stream.select( + id=seed_id.split('_')[0])[0].data.astype(np.float32)}) + pad_list = [ + int(round(template[i].stats.sampling_rate * + (template[i].stats.starttime - t_starts[j]))) + for j, template in zip(range(len(templates)), templates)] + pad_dict.update({seed_id: pad_list}) + + return stream_dict, template_dict, pad_dict, seed_ids + + +# a dict of built in xcorr functions, used to distinguish from user-defined +XCORR_FUNCS_ORIGINAL = copy.copy(XCOR_FUNCS) + + if __name__ == '__main__': import doctest + doctest.testmod() diff --git a/eqcorrscan/utils/debug_log.py b/eqcorrscan/utils/debug_log.py new file mode 100644 index 000000000..cd2b9fb05 --- /dev/null +++ b/eqcorrscan/utils/debug_log.py @@ -0,0 +1,40 @@ +""" +EQcorrscan's simple logging. + +:copyright: + EQcorrscan developers. + +:license: + GNU Lesser General Public License, Version 3 + (https://www.gnu.org/copyleft/lesser.html) +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + + +def debug_print(string, debug_level, print_level): + """ + Print the string if the print_level exceeds the debug_level. + + :type string: str + :param string: String to print + :type print_level: int + :param print_level: Print-level for statement + :type debug_level: int + :param debug_level: Output level for function + + .. rubric:: Example + >>> debug_print("Albert", 2, 0) + >>> debug_print("Norman", 0, 2) + Norman + """ + if print_level > debug_level: + print(string) + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/eqcorrscan/utils/findpeaks.py b/eqcorrscan/utils/findpeaks.py index 6189a4feb..b69df601e 100644 --- a/eqcorrscan/utils/findpeaks.py +++ b/eqcorrscan/utils/findpeaks.py @@ -204,89 +204,6 @@ def decluster(peaks, trig_int, return_ind=False, debug=0): return peaks_out -def find_peaks_dep(arr, thresh, trig_int, debug=0, starttime=False, - samp_rate=1.0): - """ - Determine peaks in an array of data above a certain threshold: depreciated. - - Depreciated peak-finding routine, very slow, but accurate. If all else \ - fails this one should work. - - :type arr: numpy.ndarray - :param arr: 1-D numpy array is required - :type thresh: float - :param thresh: The threshold below which will be considered noise and \ - peaks will not be found in. - :type trig_int: int - :param trig_int: The minimum difference in samples between triggers,\ - if multiple peaks within this window this code will find the highest. - :type starttime: obspy.core.utcdatetime.UTCDateTime - :param starttime: Starttime for plotting, only used if debug > 2. - :type samp_rate: float - :param samp_rate: Sampling rate in Hz, only used for plotting if debug > 2. - - :return: peaks: Lists of tuples of peak values and locations. - :rtype: list - - >>> import numpy as np - >>> arr = np.random.randn(100) - >>> threshold = 10 - >>> arr[40] = 20 - >>> arr[60] = 100 - >>> find_peaks_dep(arr, threshold, 3) - [(20.0, 40), (100.0, 60)] - """ - if not starttime: - starttime = UTCDateTime(0) - # Perform some checks - if trig_int < 3: - msg = 'Trigger interval must be greater than 2 samples to find maxima' - raise IOError(msg) - # from joblib import Parallel, delayed - # Will find peaks in the absolute then transfer these to the true values - sig = np.abs(arr) - thresh - true_peaks = [] - for i in range(int(trig_int), int(len(sig) - trig_int), int(trig_int)): - window = sig[i - trig_int: i + trig_int] - # Define a moving window containing data from +/- the trigger iterval - peaks = [] - locs = [] - for j in range(1, len(window) - 1): - # Find all turning points within the window - if window[j] > 0.0 and window[j] > window[j + 1] and\ - window[j] > window[j - 1]: - peaks.append(window[j]) - locs.append(i - trig_int + j) - # Find maximum peak in window - if peaks: - true_peaks.append((np.max(np.array(peaks)), - locs[np.argmax(np.array(peaks))])) - # Get unique values - peaks = sorted(list(set(true_peaks)), key=lambda loc: loc[1]) - # Find highest peak in peaks within trig_int of each other - for i in range(1, len(peaks) - 1): - if peaks[i + 1][1] - peaks[i][1] < trig_int: - if peaks[i][0] < peaks[i + 1][0]: - peaks[i] = peaks[i + 1] - else: - peaks[i + 1] = peaks[i] - elif peaks[i][1] - peaks[i - 1][1] < trig_int: - if peaks[i][0] < peaks[i - 1][0]: - peaks[i] = peaks[i - 1] - else: - peaks[i - 1] = peaks[i] - peaks = sorted(list(set(peaks)), key=lambda loc: loc[1]) - if debug >= 3: - from eqcorrscan.utils import plotting - _fname = ''.join(['peaks_', - starttime.datetime.strftime('%Y-%m-%d'), - '.pdf']) - plotting.peaks_plot(arr, starttime, samp_rate, True, peaks, - _fname) - peaks = [(peak[0] + thresh, peak[1]) for peak in peaks] - return peaks - - def coin_trig(peaks, stachans, samp_rate, moveout, min_trig, trig_int): """ Find network coincidence triggers within peaks of detection statistics. diff --git a/eqcorrscan/utils/mag_calc.py b/eqcorrscan/utils/mag_calc.py index f4357d11a..25d289799 100644 --- a/eqcorrscan/utils/mag_calc.py +++ b/eqcorrscan/utils/mag_calc.py @@ -990,11 +990,11 @@ def svd_moments(u, s, v, stachans, event_list, n_svs=2): >>> stream_files = glob.glob(os.path.join(testing_path, '*')) >>> stream_list = [read(stream_file) for stream_file in stream_files] >>> event_list = [] + >>> remove_list = [('WHAT2', 'SH1'), ('WV04', 'SHZ'), ('GCSZ', 'EHZ')] >>> for i, stream in enumerate(stream_list): ... st_list = [] ... for tr in stream: - ... if (tr.stats.station, tr.stats.channel) not in - ... [('WHAT2', 'SH1'), ('WV04', 'SHZ'), ('GCSZ', 'EHZ')]: + ... if (tr.stats.station, tr.stats.channel) not in remove_list: ... stream.remove(tr) ... continue ... tr.detrend('simple') @@ -1106,7 +1106,7 @@ def svd_moments(u, s, v, stachans, event_list, n_svs=2): K_width = len(K[0]) # Add an extra row to K, so average moment = 1 K.append(np.ones(K_width) * (1. / K_width)) - print("\nCreated Kernel matrix: ") + print("Created Kernel matrix: ") del row print('\n'.join([''.join([str(round(float(item), 3)).ljust(6) for item in row]) for row in K])) diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index 4d7dc3bca..b9fd2542b 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -171,6 +171,8 @@ def shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=0, if parallel: if not num_cores: num_cores = cpu_count() + if num_cores > len(st): + num_cores = len(st) pool = Pool(processes=num_cores) results = [pool.apply_async(process, (tr,), { 'lowcut': lowcut, 'highcut': highcut, 'filt_order': filt_order, @@ -183,10 +185,10 @@ def shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=0, st = Stream(stream_list) else: for tr in st: - process(tr=tr, lowcut=lowcut, highcut=highcut, - filt_order=filt_order, samp_rate=samp_rate, debug=debug, - starttime=False, clip=False, - seisan_chan_names=seisan_chan_names) + process( + tr=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order, + samp_rate=samp_rate, debug=debug, starttime=False, clip=False, + seisan_chan_names=seisan_chan_names) if tracein: st.merge() return st[0] @@ -244,7 +246,7 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime, debug=0, warn any-time it has to pad data - if you see strange artifacts in your detections, check whether the data have gaps. - .. rubric:: Example, bandpass + .. rubric:: Example >>> import obspy >>> if int(obspy.__version__.split('.')[0]) >= 1: @@ -253,60 +255,30 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime, debug=0, ... from obspy.fdsn import Client >>> from obspy import UTCDateTime >>> from eqcorrscan.utils.pre_processing import dayproc - >>> client = Client('GEONET') + >>> client = Client('NCEDC') >>> t1 = UTCDateTime(2012, 3, 26) >>> t2 = t1 + 86400 - >>> bulk_info = [('NZ', 'FOZ', '10', 'HHE', t1, t2), - ... ('NZ', 'FOZ', '10', 'HHE', t1, t2)] + >>> bulk_info = [('BP', 'JCNB', '40', 'SP1', t1, t2)] >>> st = client.get_waveforms_bulk(bulk_info) + >>> st_keep = st.copy() # Copy the stream for later examples + >>> # Example of bandpass filtering >>> st = dayproc(st=st, lowcut=2, highcut=9, filt_order=3, samp_rate=20, ... starttime=t1, debug=0, parallel=True, num_cores=2) >>> print(st[0]) - NZ.FOZ.10.HHE | 2012-03-25T23:59:59.998393Z - 2012-03-26T23:59:59.\ -948393Z | 20.0 Hz, 1728000 samples - - - .. rubric:: Example, low-pass - - >>> import obspy - >>> if int(obspy.__version__.split('.')[0]) >= 1: - ... from obspy.clients.fdsn import Client - ... else: - ... from obspy.fdsn import Client - >>> from obspy import UTCDateTime - >>> from eqcorrscan.utils.pre_processing import dayproc - >>> client = Client('GEONET') - >>> t1 = UTCDateTime(2012, 3, 26) - >>> t2 = t1 + 86400 - >>> bulk_info = [('NZ', 'FOZ', '10', 'HHE', t1, t2), - ... ('NZ', 'FOZ', '10', 'HHE', t1, t2)] - >>> st = client.get_waveforms_bulk(bulk_info) + BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\ +950000Z | 20.0 Hz, 1728000 samples + >>> # Example of lowpass filtering >>> st = dayproc(st=st, lowcut=None, highcut=9, filt_order=3, samp_rate=20, ... starttime=t1, debug=0, parallel=True, num_cores=2) >>> print(st[0]) - NZ.FOZ.10.HHE | 2012-03-25T23:59:59.998393Z - 2012-03-26T23:59:59.\ -948393Z | 20.0 Hz, 1728000 samples - - .. rubric:: Example, high-pass - - >>> import obspy - >>> if int(obspy.__version__.split('.')[0]) >= 1: - ... from obspy.clients.fdsn import Client - ... else: - ... from obspy.fdsn import Client - >>> from obspy import UTCDateTime - >>> from eqcorrscan.utils.pre_processing import dayproc - >>> client = Client('GEONET') - >>> t1 = UTCDateTime(2012, 3, 26) - >>> t2 = t1 + 86400 - >>> bulk_info = [('NZ', 'FOZ', '10', 'HHE', t1, t2), - ... ('NZ', 'FOZ', '10', 'HHE', t1, t2)] - >>> st = client.get_waveforms_bulk(bulk_info) + BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\ +950000Z | 20.0 Hz, 1728000 samples + >>> # Example of highpass filtering >>> st = dayproc(st=st, lowcut=2, highcut=None, filt_order=3, samp_rate=20, ... starttime=t1, debug=0, parallel=True, num_cores=2) >>> print(st[0]) - NZ.FOZ.10.HHE | 2012-03-25T23:59:59.998393Z - 2012-03-26T23:59:59.\ -948393Z | 20.0 Hz, 1728000 samples + BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\ +950000Z | 20.0 Hz, 1728000 samples """ # Add sanity check for filter if isinstance(st, Trace): @@ -339,6 +311,8 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime, debug=0, if parallel: if not num_cores: num_cores = cpu_count() + if num_cores > len(st): + num_cores = len(st) pool = Pool(processes=num_cores) results = [pool.apply_async(process, (tr,), { 'lowcut': lowcut, 'highcut': highcut, 'filt_order': filt_order, @@ -352,11 +326,11 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime, debug=0, st = Stream(stream_list) else: for tr in st: - process(tr=tr, lowcut=lowcut, highcut=highcut, - filt_order=filt_order, samp_rate=samp_rate, debug=debug, - starttime=starttime, clip=True, length=86400, - ignore_length=ignore_length, - seisan_chan_names=seisan_chan_names) + process( + tr=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order, + samp_rate=samp_rate, debug=debug, starttime=starttime, + clip=True, length=86400, ignore_length=ignore_length, + seisan_chan_names=seisan_chan_names) if tracein: st.merge() return st[0] diff --git a/eqcorrscan/utils/timer.py b/eqcorrscan/utils/timer.py index 9f50a51d0..3d2de137a 100644 --- a/eqcorrscan/utils/timer.py +++ b/eqcorrscan/utils/timer.py @@ -11,7 +11,20 @@ class Timer(object): - """Simple wrapper for timing objects, used for debug.""" + """ + Simple wrapper for timing objects, used for debug. + + .. rubric:: Example + + >>> from time import sleep + >>> with Timer() as t: + ... sleep(0.1) + >>> print("%.1f" % t.secs) # doctest:+SKIP + 0.1 + >>> with Timer(verbose=True) as t: + ... sleep(0.1) # doctest:+ELLIPSIS + elapsed time: ... + """ def __init__(self, verbose=False): """Create timer object.""" @@ -28,7 +41,7 @@ def __exit__(self, *args): self.secs = self.end - self.start self.msecs = self.secs * 1000 # millisecs if self.verbose: - print('elapsed time: %f ms' % self.msecs) + print('elapsed time: %.0f ms' % self.msecs) if __name__ == "__main__": diff --git a/pytest.ini b/pytest.ini index 8c3f8d4a7..63110dbd1 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] norecursedirs=eqcorrscan/doc .* eqcorrscan/scripts build pyasdf eqcorrscan/lib eqcorrscan/utils/src -addopts = --cov=eqcorrscan --cov-config .coveragerc --ignore=setup.py --doctest-modules -n 2 +addopts = --cov=eqcorrscan --cov-config .coveragerc --ignore=setup.py --doctest-modules -p no:warnings diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..140df563f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +numpy>=1.12 +matplotlib>=1.3.0 +scipy>=0.18 +LatLon +bottleneck +obspy>=1.0.3 +h5py +pytest>=2.0.0 +pytest-cov +pytest-pep8 +pytest-xdist +pytest-rerunfailures +codecov \ No newline at end of file diff --git a/setup.py b/setup.py index c771ab0c2..34e12b1a2 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ import os import sys +import shutil import glob import eqcorrscan @@ -55,7 +56,7 @@ def get_package_data(): if get_build_platform() in ('win32', 'win-amd64'): package_data['eqcorrscan.lib'] = [ 'libfftw3-3.dll', 'libfftw3f-3.dll', 'libfftw3l-3.dll'] - + return package_data @@ -132,7 +133,8 @@ def get_extensions(): 'include_dirs': get_include_dirs(), 'library_dirs': get_library_dirs()} - sources = [os.path.join('eqcorrscan', 'lib', 'multi_corr.c')] + sources = [os.path.join('eqcorrscan', 'lib', 'multi_corr.c'), + os.path.join('eqcorrscan', 'lib', 'time_corr.c')] exp_symbols = export_symbols("eqcorrscan/lib/libutils.def") if get_build_platform() not in ('win32', 'win-amd64'): @@ -238,7 +240,8 @@ def setup_package(): if not READ_THE_DOCS: install_requires = ['matplotlib>=1.3.0', 'scipy>=0.18', 'LatLon', - 'bottleneck', 'obspy>=1.0.3', 'numpy>=1.12'] + 'bottleneck', 'obspy>=1.0.3', 'numpy>=1.12', + 'h5py'] else: install_requires = ['matplotlib>=1.3.0', 'LatLon', 'obspy>=1.0.3', 'mock'] @@ -257,7 +260,7 @@ def setup_package(): 'Development Status :: 4 - Beta', 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering', - 'License :: OSI Approved :: GNU Library or Lesser General Public ' + + 'License :: OSI Approved :: GNU Library or Lesser General Public ' 'License (LGPL)', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.4', @@ -268,8 +271,8 @@ def setup_package(): 'scripts': scriptfiles, 'install_requires': install_requires, 'setup_requires': ['pytest-runner'], - 'tests_require': ['pytest', 'pytest-cov', 'pytest-pep8', - 'pytest-xdist'], + 'tests_require': ['pytest>=2.0.0', 'pytest-cov', 'pytest-pep8', + 'pytest-xdist', 'pytest-rerunfailures'], 'cmdclass': {'build_ext': CustomBuildExt} } @@ -290,6 +293,8 @@ def setup_package(): setup_args['ext_modules'] = get_extensions() setup_args['package_data'] = get_package_data() setup_args['package_dir'] = get_package_dir() + if os.path.isdir("build"): + shutil.rmtree("build") setup(**setup_args) if __name__ == '__main__':