diff --git a/.coveragerc b/.coveragerc
index 59cec5006..9ef2f6226 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,6 +1,7 @@
[run]
branch = True
source = eqcorrscan
+concurrency = multiprocessing,thread
omit =
eqcorrscan/__init__.py
eqcorrscan/*/__init__.py
diff --git a/.github/test_conda_env.yml b/.github/test_conda_env.yml
index 8ff5c492f..31666f8a4 100644
--- a/.github/test_conda_env.yml
+++ b/.github/test_conda_env.yml
@@ -5,9 +5,9 @@ channels:
dependencies:
- numpy>=1.12
- matplotlib>=1.3.0
- - scipy>=0.18,<1.9.0 # Pinned due to scipy/obspy hanning renaming
+ - scipy
- mock
- - obspy>=1.3.0
+ - obspy>=1.4.0
- h5py
- pyyaml
- bottleneck
@@ -17,8 +17,6 @@ dependencies:
- pytest-pep8
- pytest-xdist
- pytest-rerunfailures
- - pytest-mpl<0.16.0
+ - pytest-mpl
- codecov
- - pip
- - pip:
- - pyfftw
+ - boto3
diff --git a/.github/test_conda_env_macOS.yml b/.github/test_conda_env_macOS.yml
index 3282a48c6..62bc683cd 100644
--- a/.github/test_conda_env_macOS.yml
+++ b/.github/test_conda_env_macOS.yml
@@ -1,23 +1,28 @@
name: eqcorrscan-test
channels:
- conda-forge
- - defaults
dependencies:
- - clangdev>=4
- - openmp>=4
- - libcxx>=4
- - cctools
- - clang
+ # Compiler bits to match conda-forge build env
+ - cctools_osx-64
+ - clang=14
+ - clang-14
- clang_osx-64
+ - llvm-openmp
+ - clangxx=14
- compiler-rt
- - libcxx
- - llvm-openmp>=4.0.1
+ - compiler-rt_osx-64
+ - ld64_osx-64
+ - libclang-cpp14
+ - libcxx=14
+ - libllvm14
+ - llvm-tools
+ # Normal requirements
- numpy>=1.12
- matplotlib>=1.3.0
- - scipy>=0.18,<1.9.0 # Pinned due to scipy/obspy hanning renaming
+ - scipy
- mock
- - obspy>=1.3.0
- - h5py<3.2 # Issue with dep resolution: https://github.com/conda-forge/h5py-feedstock/issues/92
+ - obspy>=1.4.0
+ - h5py>3.3 # Issue with dep resolution: https://github.com/conda-forge/h5py-feedstock/issues/92
- pyyaml
- bottleneck
- fftw
@@ -28,6 +33,4 @@ dependencies:
- pytest-rerunfailures
- pytest-mpl<0.16.0
- codecov
- - pip
- - pip:
- - pyfftw
+ - boto3
diff --git a/.github/workflows/flake8-linter-matcher.json b/.github/workflows/flake8-linter-matcher.json
new file mode 100644
index 000000000..58c5dabca
--- /dev/null
+++ b/.github/workflows/flake8-linter-matcher.json
@@ -0,0 +1,17 @@
+{
+ "problemMatcher": [
+ {
+ "owner": "flake8-linter-error",
+ "severity": "error",
+ "pattern": [
+ {
+ "regexp": "^([^:]+):(\\d+):(\\d+):\\s+([EWCNF]\\d+\\s+.+)$",
+ "file": 1,
+ "line": 2,
+ "column": 3,
+ "message": 4
+ }
+ ]
+ }
+ ]
+}
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
new file mode 100644
index 000000000..f2b1f9676
--- /dev/null
+++ b/.github/workflows/flake8.yml
@@ -0,0 +1,27 @@
+name: Flake8 Linter
+on:
+ pull_request:
+jobs:
+ flake8-linter:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout source
+ uses: actions/checkout@v3
+ - name: Install Python 3.10
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ - name: Install Flake8 5.0.4 linter
+ run: pip install flake8==5.0.4 # use this version for --diff option
+ - name: Setup Flake8 output matcher for PR annotations
+ run: echo '::add-matcher::.github/workflows/flake8-linter-matcher.json'
+ - name: Fetch pull request target branch
+ run: |
+ git remote add upstream https://github.com/eqcorrscan/EQcorrscan.git
+ git fetch upstream $GITHUB_BASE_REF
+ - name: Run Flake8 linter
+ run: git diff upstream/$GITHUB_BASE_REF HEAD | flake8
+ --exclude eqcorrscan/doc
+ --ignore W605,W504,W503
+ --max-line-length 80
+ --diff
diff --git a/.github/workflows/runtest.yml b/.github/workflows/runtest.yml
index 49408e66b..17337e5fa 100644
--- a/.github/workflows/runtest.yml
+++ b/.github/workflows/runtest.yml
@@ -8,12 +8,12 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
- python-version: ['3.7', '3.8', '3.9']
+ python-version: ['3.8', '3.9', '3.10', '3.11']
fail-fast: false
# continue-on-error: true
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Get conda env file
shell: bash -l {0}
@@ -22,31 +22,22 @@ jobs:
cp .github/test_conda_env_macOS.yml .github/test_conda_env.yml
fi
- - name: Cache conda
- uses: actions/cache@v2
- env:
- # Increase this value to reset cache if needed by env file has not changed
- CACHE_NUMBER: 0
- with:
- path: ~/conda_pkgs_dir
- key:
- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
- hashFiles('.github/test_conda_env.yml') }}
-
- name: Setup conda
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v2.1.1
with:
- miniconda-version: 'latest'
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
python-version: ${{ matrix.python-version }}
activate-environment: eqcorrscan-test
- environment-file: .github/test_conda_env.yml
- condarc-file: .github/test_condarc.yml
- use-only-tar-bz2: true # Must be set for caching to work properly
+ use-mamba: true
+
+ - name: Update Env
+ run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml
- name: install eqcorrscan
shell: bash -l {0}
run: |
- pip install -e .
+ pip install -v -e . --no-deps
- name: print package info
shell: bash -l {0}
@@ -65,7 +56,7 @@ jobs:
- name: run main test suite
shell: bash -l {0}
run: |
- py.test -n 2 -m "not serial and not network and not superslow" --cov-report=xml --dist loadscope
+ py.test -n 2 -m "not serial and not network and not superslow and not slow" --cov-report=xml --dist loadscope
- name: run serial test
if: always()
@@ -81,7 +72,7 @@ jobs:
py.test -v -m "slow and not serial and not network" --cov-report=xml --cov-append
- name: upload coverage
- uses: codecov/codecov-action@v1
+ uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
@@ -95,37 +86,28 @@ jobs:
runs-on: "ubuntu-latest"
strategy:
matrix:
- python-version: [3.8]
+ python-version: ['3.10']
fail-fast: false
steps:
- - uses: actions/checkout@v2
-
- - name: Cache conda
- uses: actions/cache@v2
- env:
- # Increase this value to reset cache if needed by env file has not changed
- CACHE_NUMBER: 0
- with:
- path: ~/conda_pkgs_dir
- key:
- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
- hashFiles('.github/test_conda_env.yml') }}
+ - uses: actions/checkout@v3
- name: Setup conda
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v2.1.1
with:
- miniconda-version: 'latest'
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
python-version: ${{ matrix.python-version }}
activate-environment: eqcorrscan-test
- environment-file: .github/test_conda_env.yml
- condarc-file: .github/test_condarc.yml
- use-only-tar-bz2: true # Must be set for caching to work properly
+ use-mamba: true
+
+ - name: Update Env
+ run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml
- name: install eqcorrscan
shell: bash -l {0}
run: |
- pip install -e .
+ pip install -e . --no-deps
- name: print package info
shell: bash -l {0}
@@ -136,7 +118,7 @@ jobs:
- name: run network tests
shell: bash -l {0}
run: |
- py.test -n 2 -m "network" --cov-report=xml
+ py.test -n 2 -m "network and not superslow" --cov-report=xml
- name: run tutorials
if: always()
shell: bash -l {0}
@@ -149,7 +131,7 @@ jobs:
py.test -m "superslow" -s eqcorrscan/tests/tutorials_test.py eqcorrscan/tests/subspace_test.py --cov-report=xml --cov-append
- name: upload coverage
- uses: codecov/codecov-action@v1
+ uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
@@ -162,32 +144,23 @@ jobs:
runs-on: "ubuntu-latest"
strategy:
matrix:
- python-version: [3.8]
+ python-version: ['3.9']
fail-fast: false
steps:
- - uses: actions/checkout@v2
-
- - name: Cache conda
- uses: actions/cache@v2
- env:
- # Increase this value to reset cache if needed by env file has not changed
- CACHE_NUMBER: 0
- with:
- path: ~/conda_pkgs_dir
- key:
- ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
- hashFiles('.github/test_conda_env.yml') }}
-
+ - uses: actions/checkout@v3
+
- name: Setup conda
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v2.1.1
with:
- miniconda-version: 'latest'
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
python-version: ${{ matrix.python-version }}
activate-environment: eqcorrscan-test
- environment-file: .github/test_conda_env.yml
- condarc-file: .github/test_condarc.yml
- use-only-tar-bz2: true # Must be set for caching to work properly
+ use-mamba: true
+
+ - name: Update Env
+ run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml
- name: install fmf
shell: bash -l {0}
@@ -195,7 +168,7 @@ jobs:
cd ..
git clone https://github.com/beridel/fast_matched_filter.git
cd fast_matched_filter
- pip install -e .
+ pip install -e . --no-deps
cd ../EQcorrscan
- name: install eqcorrscan
@@ -216,7 +189,7 @@ jobs:
py.test eqcorrscan/tests/correlate_test.py --cov-report=xml
- name: upload coverage
- uses: codecov/codecov-action@v1
+ uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 63dcc2610..3cd235e87 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -16,7 +16,8 @@ sphinx:
configuration: eqcorrscan/doc/conf.py
# Optionally build your docs in additional formats such as PDF and ePub
-formats: all
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html#formats
+formats: []
# Optionally set the version of Python and requirements required to build your docs
python:
diff --git a/.stickler.yml b/.stickler.yml
deleted file mode 100644
index 5e8537b49..000000000
--- a/.stickler.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-linters:
- flake8:
- python: 3
-
-files:
- ignore:
- - 'eqcorrscan/doc/*'
diff --git a/CHANGES.md b/CHANGES.md
index 74ec346bd..e49cdf69e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,4 +1,49 @@
## Current
+* core.match_filter.tribe
+ - Significant re-write of detect logic to take advantage of parallel steps (see #544)
+ - Significant re-structure of hidden functions.
+* core.match_filter.matched_filter
+ - 5x speed up for MAD threshold calculation with parallel (threaded) MAD
+ calculation (#531).
+* core.match_filter.detect
+ - 1000x speedup for retrieving unique detections for all templates.
+ - 30x speedup in handling detections (50x speedup in selecting detections,
+ 4x speedup in adding prepick time)
+* core.match_filter.template
+ - new quick_group_templates function for 50x quicker template grouping.
+ - Templates with nan channels will be considered equal to other templates with shared
+ nan channels.
+ - New grouping strategy to minimise nan-channels - templates are grouped by
+ similar seed-ids. This should speed up both correlations and
+ prep_data_for_correlation. See PR #457.
+* utils.pre_processing
+ - `_prep_data_for_correlation`: 3x speedup for filling NaN-traces in templates
+ - New function ``quick_trace_select` for a very efficient selection of trace
+ by seed ID without wildcards (4x speedup).
+ - `process`, `dayproc` and `shortproc` replaced by `multi_process`. Deprecation
+ warning added.
+ - `multi_process` implements multithreaded GIL-releasing parallelism of slow
+ sections (detrending, resampling and filtering) of the processing workflow.
+ Multiprocessing is no longer supported or needed for processing. See PR #540
+ for benchmarks. New approach is slightly faster overall, and significantly
+ more memory efficeint (uses c. 6x less memory than old multiprocessing approach
+ on a 12 core machine)
+* utils.correlate
+ - 25 % speedup for `_get_array_dicts` with quicker access to properties.
+* utils.catalog_to_dd
+ - _prepare_stream
+ - Now more consistently slices templates to length = extract_len * samp_rate
+ so that user receives less warnings about insufficient data.
+ - write_correlations
+ - New option `use_shared_memory` to speed up correlation of many events by
+ ca. 20 % by moving trace data into shared memory.
+ - Add ability to weight correlations by raw correlation rather than just
+ correlation squared.
+* utils.cluster.decluster_distance_time
+ - Bug-fix: fix segmentation fault when declustering more than 46340 detections
+ with hypocentral_separation.
+
+## 0.4.4
* core.match_filter
- Bug-fix: peak-cores could be defined twice in _group_detect through kwargs.
Fix: only update peak_cores if it isn't there already.
@@ -9,9 +54,17 @@
* core.lag_calc._xcorr_interp
- CC-interpolation replaced with resampling (more robust), old method
deprecated. Use new method with use_new_resamp_method=True as **kwarg.
-* core.lag_calc:
+* core.lag_calc
+ - Added new option all_vert to transfer P-picks to all channels defined as
+ vertical_chans.
+ - Made usage of all_vert, all_horiz consistent across the lag_calc.
- Fixed bug where minimum CC defined via min_cc_from_mean_cc_factor was not
set correctly for negative correlation sums.
+* core.template_gen
+ - Added new option all_vert to transfer P-picks to all channels defined as
+ vertical_chans.
+ - Made handling of horizontal_chans and vertical_chans consistent so that user
+ can freely choose relevant channels.
* utils.correlate
- Fast Matched Filter now supported natively for version >= 1.4.0
- Only full correlation stacks are returned now (e.g. where fewer than than
diff --git a/README.md b/README.md
index 9021d6310..bce837bbb 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
## A python package for the detection and analysis of repeating and near-repeating earthquakes.
## Citation:
-We have a manuscript on the development of EQcorrscan, if you make use of EQcorrscan please cite the folloing paper:
+We have a manuscript on the development of EQcorrscan, if you make use of EQcorrscan please cite the following paper:
Chamberlain, C. J., Hopp, C. J., Boese, C. M., Warren-Smith, E., Chambers, D., Chu, S. X., Michailos, K., Townend, J., [EQcorrscan: Repeating and near-repeating earthquake detection and analysis in Python.](https://pubs.geoscienceworld.org/ssa/srl/article/89/1/173/524875/eqcorrscan-repeating-and-near-repeating-earthquake) Seismological Research Letters *2017*
@@ -89,3 +89,16 @@ Note that tests for travis and appveyor are run daily on master as cron jobs, an
This package is written and maintained by the EQcorrscan developers,
and is distributed under the LGPL GNU License,
Copyright EQcorrscan developers 2018.
+
+
+# Funding
+
+![RCET](eqcorrscan/doc/RCET_logo_transparent.png)
+
+Continued development of the EQcorrscan package is directly supported by the
+[RCET](https://www.rcet.science/), Rapid Characterisation of Earthquakes and Tsunami
+programme funded by the New Zealand Ministry of Business, Innovation and Employment
+Endeavour fund.
+
+Development is indirectly funded by grants from [Toku Tū Ake: EQC](https://www.eqc.govt.nz/)
+and a [Rutherford Discovery Fellowship](https://www.royalsociety.org.nz/what-we-do/funds-and-opportunities/rutherford-discovery-fellowships/rutherford-discovery-fellowship-recipients/calum-chamberlain/).
diff --git a/conftest.py b/conftest.py
index 0549ad04f..4da04679d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,5 +1,6 @@
import os
import shutil
+import glob
from os.path import join, dirname
import pytest
@@ -58,7 +59,8 @@ def clean_up_test_files():
'dt.cc2',
'dt.ct',
'dt.ct2',
- 'phase.dat'
+ 'phase.dat',
+ 'eqcorrscan_temporary_party.pkl'
]
yield
@@ -85,7 +87,11 @@ def clean_up_test_directories():
'test_tar_write',
'tmp1',
'cc_exported',
+ '.streams',
+ '.parties'
]
+ directories_to_kill.extend(glob.glob(".template_db_*"))
+ directories_to_kill.extend(glob.glob(".streams_*"))
yield
@@ -93,6 +99,7 @@ def clean_up_test_directories():
for directory in directories_to_kill:
if os.path.isdir(directory):
try:
+ print(f"Removing directory {directory}")
shutil.rmtree(directory)
except Exception as e:
print("Could not find directory, already cleaned?")
diff --git a/eqcorrscan/core/lag_calc.py b/eqcorrscan/core/lag_calc.py
index dd71efc36..09adb6740 100644
--- a/eqcorrscan/core/lag_calc.py
+++ b/eqcorrscan/core/lag_calc.py
@@ -24,6 +24,7 @@
from eqcorrscan.core.match_filter.family import Family
from eqcorrscan.core.match_filter.template import Template
from eqcorrscan.utils.plotting import plot_repicked
+from eqcorrscan.utils.pre_processing import _stream_quick_select
show_interp_deprec_warning = True
@@ -168,7 +169,8 @@ def _concatenate_and_correlate(streams, template, cores):
for i, chan in enumerate(chans):
start_index = 0
for j, stream in enumerate(streams):
- tr = stream.select(id=chan)
+ # tr = stream.select(id=chan)
+ tr = _stream_quick_select(stream, chan)
if len(tr) == 0:
# No data for this channel in this stream
used_chans[j].append(UsedChannel(
@@ -221,8 +223,9 @@ def _concatenate_and_correlate(streams, template, cores):
def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4,
min_cc_from_mean_cc_factor=None,
+ all_vert=False, all_horiz=False, vertical_chans=['Z'],
horizontal_chans=['E', 'N', '1', '2'],
- vertical_chans=['Z'], cores=1, interpolate=False,
+ cores=1, interpolate=False,
plot=False, plotdir=None, export_cc=False, cc_dir=None,
**kwargs):
"""
@@ -279,7 +282,9 @@ def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4,
picked_dict = {}
delta = family.template.st[0].stats.delta
detect_streams_dict = _prepare_data(
- family=family, detect_data=stream, shift_len=shift_len)
+ family=family, detect_data=stream, shift_len=shift_len,
+ all_vert=all_vert, all_horiz=all_horiz, vertical_chans=vertical_chans,
+ horizontal_chans=horizontal_chans)
detection_ids = list(detect_streams_dict.keys())
detect_streams = [detect_streams_dict[detection_id]
for detection_id in detection_ids]
@@ -396,7 +401,9 @@ def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4,
return picked_dict
-def _prepare_data(family, detect_data, shift_len):
+def _prepare_data(family, detect_data, shift_len, all_vert=False,
+ all_horiz=False, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2']):
"""
Prepare data for lag_calc - reduce memory here.
@@ -423,7 +430,9 @@ def _prepare_data(family, detect_data, shift_len):
"samples".format(length))
prepick = shift_len + family.template.prepick
detect_streams_dict = family.extract_streams(
- stream=detect_data, length=length, prepick=prepick)
+ stream=detect_data, length=length, prepick=prepick,
+ all_vert=all_vert, all_horiz=all_horiz, vertical_chans=vertical_chans,
+ horizontal_chans=horizontal_chans)
for key, detect_stream in detect_streams_dict.items():
# Split to remove trailing or leading masks
for i in range(len(detect_stream) - 1, -1, -1):
@@ -451,6 +460,7 @@ def _prepare_data(family, detect_data, shift_len):
def lag_calc(detections, detect_data, template_names, templates,
shift_len=0.2, min_cc=0.4, min_cc_from_mean_cc_factor=None,
+ all_vert=False, all_horiz=False,
horizontal_chans=['E', 'N', '1', '2'],
vertical_chans=['Z'], cores=1, interpolate=False,
plot=False, plotdir=None, export_cc=False, cc_dir=None, **kwargs):
@@ -597,6 +607,7 @@ def lag_calc(detections, detect_data, template_names, templates,
template_dict = xcorr_pick_family(
family=family, stream=detect_data, min_cc=min_cc,
min_cc_from_mean_cc_factor=min_cc_from_mean_cc_factor,
+ all_vert=all_vert, all_horiz=all_horiz,
horizontal_chans=horizontal_chans,
vertical_chans=vertical_chans, interpolate=interpolate,
cores=cores, shift_len=shift_len, plot=plot, plotdir=plotdir,
diff --git a/eqcorrscan/core/match_filter/detection.py b/eqcorrscan/core/match_filter/detection.py
index 5fe689db3..9ac074ddb 100644
--- a/eqcorrscan/core/match_filter/detection.py
+++ b/eqcorrscan/core/match_filter/detection.py
@@ -23,6 +23,7 @@
Origin)
from eqcorrscan.core.match_filter.helpers import _test_event_similarity
+from eqcorrscan.utils.pre_processing import _stream_quick_select
Logger = logging.getLogger(__name__)
@@ -67,10 +68,11 @@ class Detection(object):
:type id: str
:param id: Identification for detection (should be unique).
"""
+ _precision = 1e-5 # Used for warning about out of range correlations
def __init__(self, template_name, detect_time, no_chans, detect_val,
threshold, typeofdet, threshold_type, threshold_input,
- chans=None, event=None, id=None):
+ chans=None, event=None, id=None, strict=True):
"""Main class of Detection."""
self.template_name = template_name
self.detect_time = detect_time
@@ -93,7 +95,13 @@ def __init__(self, template_name, detect_time, no_chans, detect_val,
if event is not None:
event.resource_id = self.id
if self.typeofdet == 'corr':
- assert abs(self.detect_val) <= self.no_chans
+ if abs(self.detect_val) > self.no_chans * (1 + self._precision):
+ msg = (f"Correlation detection at {self.detect_val} exceeds "
+ f"boundedness ({self.no_chans}")
+ if strict:
+ raise OverflowError(msg)
+ else:
+ Logger.error(msg)
def __repr__(self):
"""Simple print."""
@@ -284,7 +292,8 @@ def _calculate_event(self, template=None, template_st=None,
new_pick.phase_hint = template_pick[0].phase_hint
else:
# Multiple picks for this trace in template
- similar_traces = template_st.select(id=tr.id)
+ # similar_traces = template_st.select(id=tr.id)
+ similar_traces = _stream_quick_select(template_st, tr.id)
similar_traces.sort()
_index = similar_traces.traces.index(tr)
try:
@@ -348,7 +357,9 @@ def _calculate_event(self, template=None, template_st=None,
self.event = ev
return self
- def extract_stream(self, stream, length, prepick):
+ def extract_stream(self, stream, length, prepick, all_vert=False,
+ all_horiz=False, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2']):
"""
Extract a cut stream of a given length around the detection.
@@ -374,7 +385,17 @@ def extract_stream(self, stream, length, prepick):
pick = [
p for p in self.event.picks
if p.waveform_id.station_code == station and
- p.waveform_id.channel_code == channel]
+ p.waveform_id.channel_code[0:-1] == channel[0:-1]]
+ # Allow picks to be transferred to other vertical/horizontal chans
+ if all_vert and channel[-1] in vertical_chans:
+ pick = [p for p in pick
+ if p.waveform_id.channel_code[-1] in vertical_chans]
+ elif all_horiz and channel[-1] in horizontal_chans:
+ pick = [p for p in pick
+ if p.waveform_id.channel_code[-1] in horizontal_chans]
+ else:
+ pick = [p for p in pick
+ if p.waveform_id.channel_code == channel]
if len(pick) == 0:
Logger.info("No pick for {0}.{1}".format(station, channel))
continue
diff --git a/eqcorrscan/core/match_filter/family.py b/eqcorrscan/core/match_filter/family.py
index 332b02b8d..729a01355 100644
--- a/eqcorrscan/core/match_filter/family.py
+++ b/eqcorrscan/core/match_filter/family.py
@@ -18,14 +18,10 @@
import logging
from obspy import UTCDateTime, Stream, Catalog
-from obspy.core.event import (
- StationMagnitude, Magnitude, ResourceIdentifier, WaveformStreamID,
- CreationInfo, StationMagnitudeContribution)
-from eqcorrscan.core.match_filter.matched_filter import _group_process
+from eqcorrscan.utils.pre_processing import _group_process
from eqcorrscan.core.match_filter.detection import Detection, get_catalog
from eqcorrscan.utils.plotting import cumulative_detections
-from eqcorrscan.utils.mag_calc import relative_magnitude
Logger = logging.getLogger(__name__)
@@ -507,8 +503,8 @@ def write(self, filename, format='tar', overwrite=False):
return
def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
- min_cc_from_mean_cc_factor=None,
- horizontal_chans=['E', 'N', '1', '2'], vertical_chans=['Z'],
+ min_cc_from_mean_cc_factor=None, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2'],
cores=1, interpolate=False, plot=False, plotdir=None,
parallel=True, process_cores=None, ignore_length=False,
ignore_bad_data=False, export_cc=False, cc_dir=None,
@@ -766,10 +762,17 @@ def _process_streams(self, stream, pre_processed, process_cores=1,
template_stream = stream
if not pre_processed:
processed_streams = _group_process(
- template_group=[self.template], cores=process_cores,
- parallel=parallel, stream=template_stream.merge().copy(),
- daylong=False, ignore_length=ignore_length, overlap=0.0,
- ignore_bad_data=ignore_bad_data)
+ filt_order=self.template.filt_order,
+ highcut=self.template.highcut,
+ lowcut=self.template.lowcut,
+ samp_rate=self.template.samp_rate,
+ process_length=self.template.process_length,
+ parallel=parallel,
+ cores=process_cores,
+ stream=template_stream.merge().copy(),
+ daylong=False,
+ ignore_length=ignore_length,
+ overlap=0.0, ignore_bad_data=ignore_bad_data)
processed_stream = Stream()
for p in processed_streams:
processed_stream += p
@@ -779,7 +782,9 @@ def _process_streams(self, stream, pre_processed, process_cores=1,
processed_stream = stream.merge()
return processed_stream.split()
- def extract_streams(self, stream, length, prepick):
+ def extract_streams(self, stream, length, prepick, all_vert=False,
+ all_horiz=False, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2']):
"""
Generate a dictionary of cut streams around detections.
@@ -797,7 +802,9 @@ def extract_streams(self, stream, length, prepick):
"""
# Splitting and merging to remove trailing and leading masks
return {d.id: d.extract_stream(
- stream=stream, length=length, prepick=prepick).split().merge()
+ stream=stream, length=length, prepick=prepick, all_vert=all_vert,
+ all_horiz=all_horiz, vertical_chans=vertical_chans,
+ horizontal_chans=horizontal_chans).split().merge()
for d in self.detections}
diff --git a/eqcorrscan/core/match_filter/helpers/__init__.py b/eqcorrscan/core/match_filter/helpers/__init__.py
new file mode 100644
index 000000000..f212f44c2
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/__init__.py
@@ -0,0 +1,13 @@
+"""
+
+"""
+
+from .archive_access import ( # noqa: F401
+ _par_read, _resolved, _badpath, _badlink, _safemembers,
+ temporary_directory)
+from .matched_filter import ( # noqa: F401
+ _tr_spike_test, _spike_test, _total_microsec, _templates_match,
+ _test_event_similarity, _remove_duplicates, _moveout, _mad,
+ _pickle_stream, _unpickle_stream, extract_from_stream,
+ normxcorr2)
+from .clients import (get_waveform_client) # noqa: F401
diff --git a/eqcorrscan/core/match_filter/helpers/archive_access.py b/eqcorrscan/core/match_filter/helpers/archive_access.py
new file mode 100644
index 000000000..b0f1a01d1
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/archive_access.py
@@ -0,0 +1,128 @@
+"""
+Helper functions for access to archives, e.g. Party, Tribe
+
+:copyright:
+ EQcorrscan developers.
+
+:license:
+ GNU Lesser General Public License, Version 3
+ (https://www.gnu.org/copyleft/lesser.html)
+"""
+
+import contextlib
+import os
+import shutil
+import tempfile
+import tarfile
+import logging
+
+
+Logger = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def temporary_directory():
+ """ make a temporary directory, yeild its name, cleanup on exit """
+ dir_name = tempfile.mkdtemp()
+ yield dir_name
+ if os.path.exists(dir_name):
+ shutil.rmtree(dir_name)
+
+
+def _par_read(dirname, compressed=True):
+ """
+ Internal write function to read a formatted parameter file.
+
+ :type dirname: str
+ :param dirname: Directory to read the parameter file from.
+ :type compressed: bool
+ :param compressed: Whether the directory is compressed or not.
+ """
+ from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
+ from eqcorrscan.core.match_filter.template import Template
+
+ templates = []
+ if compressed:
+ arc = tarfile.open(dirname, "r:*")
+ members = arc.getmembers()
+ _parfile = [member for member in members
+ if member.name.split(os.sep)[-1] ==
+ 'template_parameters.csv']
+ if len(_parfile) == 0:
+ arc.close()
+ raise MatchFilterError(
+ 'No template parameter file in archive')
+ parfile = arc.extractfile(_parfile[0])
+ else:
+ parfile = open(dirname + '/' + 'template_parameters.csv', 'r')
+ for line in parfile:
+ t_in = Template()
+ for key_pair in line.rstrip().split(','):
+ if key_pair.split(':')[0].strip() == 'name':
+ t_in.__dict__[key_pair.split(':')[0].strip()] = \
+ key_pair.split(':')[-1].strip()
+ elif key_pair.split(':')[0].strip() == 'filt_order':
+ try:
+ t_in.__dict__[key_pair.split(':')[0].strip()] = \
+ int(key_pair.split(':')[-1])
+ except ValueError:
+ pass
+ else:
+ try:
+ t_in.__dict__[key_pair.split(':')[0].strip()] = \
+ float(key_pair.split(':')[-1])
+ except ValueError:
+ pass
+ templates.append(t_in)
+ parfile.close()
+ if compressed:
+ arc.close()
+ return templates
+
+
+def _resolved(x):
+ return os.path.realpath(os.path.abspath(x))
+
+
+def _badpath(path, base):
+ """
+ joinpath will ignore base if path is absolute.
+ """
+ return not _resolved(os.path.join(base, path)).startswith(base)
+
+
+def _badlink(info, base):
+ """
+ Links are interpreted relative to the directory containing the link
+ """
+ tip = _resolved(os.path.join(base, os.path.dirname(info.name)))
+ return _badpath(info.linkname, base=tip)
+
+
+def _safemembers(members):
+ """
+ Check members of a tar archive for safety.
+
+ Ensure that they do not contain paths or links outside of where we
+ need them - this would only happen if the archive wasn't made by
+ eqcorrscan.
+
+ :type members: :class:`tarfile.TarFile`
+ :param members: an open tarfile.
+ """
+ base = _resolved(".")
+
+ for finfo in members:
+ assert not _badpath(finfo.name, base), \
+ f"{finfo.name} is blocked (illegal path)"
+ if finfo.issym() or finfo.islnk():
+ assert not _badlink(finfo, base), \
+ f"{finfo.name} is blocked: Link to {finfo.linkname}"
+ else:
+ yield finfo
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/helpers/clients.py b/eqcorrscan/core/match_filter/helpers/clients.py
new file mode 100644
index 000000000..2360d7ae9
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/clients.py
@@ -0,0 +1,45 @@
+"""
+Helper functions for seismic data clients.
+
+:copyright:
+ EQcorrscan developers.
+
+:license:
+ GNU Lesser General Public License, Version 3
+ (https://www.gnu.org/copyleft/lesser.html)
+"""
+import logging
+
+from obspy import Stream
+
+
+Logger = logging.getLogger(__name__)
+
+
+def get_waveform_client(waveform_client):
+ """
+ Bind a `get_waveforms_bulk` method to client if it doesn't already have one
+
+ :param waveform_client: Obspy client with a `get_waveforms` method
+
+ :returns: waveform_client with `get_waveforms_bulk`.
+ """
+ def _get_waveforms_bulk_naive(self, bulk_arg):
+ """ naive implementation of get_waveforms_bulk that uses iteration. """
+ st = Stream()
+ for arg in bulk_arg:
+ st += self.get_waveforms(*arg)
+ return st
+
+ # add waveform_bulk method dynamically if it doesn't exist already
+ if not hasattr(waveform_client, "get_waveforms_bulk"):
+ bound_method = _get_waveforms_bulk_naive.__get__(waveform_client)
+ setattr(waveform_client, "get_waveforms_bulk", bound_method)
+
+ return waveform_client
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/helpers.py b/eqcorrscan/core/match_filter/helpers/matched_filter.py
similarity index 71%
rename from eqcorrscan/core/match_filter/helpers.py
rename to eqcorrscan/core/match_filter/helpers/matched_filter.py
index ef8b4c319..26abd5e48 100644
--- a/eqcorrscan/core/match_filter/helpers.py
+++ b/eqcorrscan/core/match_filter/helpers/matched_filter.py
@@ -8,14 +8,14 @@
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
-import contextlib
+
import os
-import shutil
-import tarfile
-import tempfile
import logging
+import pickle
import numpy as np
+
+from concurrent.futures import ThreadPoolExecutor
from obspy import Stream
from obspy.core.event import Event
@@ -25,36 +25,13 @@
Logger = logging.getLogger(__name__)
-@contextlib.contextmanager
-def temporary_directory():
- """ make a temporary directory, yeild its name, cleanup on exit """
- dir_name = tempfile.mkdtemp()
- yield dir_name
- if os.path.exists(dir_name):
- shutil.rmtree(dir_name)
-
-
-def get_waveform_client(waveform_client):
- """
- Bind a `get_waveforms_bulk` method to client if it doesn't already have one
-
- :param waveform_client: Obspy client with a `get_waveforms` method
-
- :returns: waveform_client with `get_waveforms_bulk`.
- """
- def _get_waveforms_bulk_naive(self, bulk_arg):
- """ naive implementation of get_waveforms_bulk that uses iteration. """
- st = Stream()
- for arg in bulk_arg:
- st += self.get_waveforms(*arg)
- return st
-
- # add waveform_bulk method dynamically if it doesn't exist already
- if not hasattr(waveform_client, "get_waveforms_bulk"):
- bound_method = _get_waveforms_bulk_naive.__get__(waveform_client)
- setattr(waveform_client, "get_waveforms_bulk", bound_method)
-
- return waveform_client
+def _tr_spike_test(data, percent, multiplier):
+ data_len = data.shape[0]
+ thresh = 2 * np.max(np.sort(
+ np.abs(data))[0:np.int64(percent * data_len)]) * multiplier
+ if (data > thresh).sum() > 0:
+ return True
+ return False
def _spike_test(stream, percent=0.99, multiplier=1e7):
@@ -70,13 +47,15 @@ def _spike_test(stream, percent=0.99, multiplier=1e7):
"""
from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
+ to_check = ((tr.data, percent, multiplier) for tr in stream)
list_ids = []
- for tr in stream:
- if (tr.data > 2 * np.max(np.sort(
- np.abs(tr.data))[0:int(percent * len(tr.data))]
- ) * multiplier).sum() > 0:
- list_ids.append(tr.id)
- if list_ids != []:
+ with ThreadPoolExecutor() as executor:
+ for tr, spiked in zip(stream, executor.map(
+ lambda args: _tr_spike_test(*args), to_check)):
+ if spiked:
+ list_ids.append(tr.id)
+
+ if len(list_ids):
ids = ', '.join(list_ids)
msg = ('Spikes above ' + str(multiplier) +
' of the range of ' + str(percent) +
@@ -245,112 +224,71 @@ def _test_event_similarity(event_1, event_2, verbose=False, shallow=False):
' %s' % (amp_1[key], amp_2[key], key))
return False
elif key == "waveform_id":
- if pick_1[key].station_code != pick_2[key].station_code:
+ if amp_1[key].station_code != amp_2[key].station_code:
if verbose:
print('Station codes do not match')
return False
- if pick_1[key].channel_code[0] != pick_2[key].channel_code[0]:
+ if amp_1[key].channel_code[0] != amp_2[key].channel_code[0]:
if verbose:
print('Channel codes do not match')
return False
- if pick_1[key].channel_code[-1] != \
- pick_2[key].channel_code[-1]:
+ if amp_1[key].channel_code[-1] != \
+ amp_2[key].channel_code[-1]:
if verbose:
print('Channel codes do not match')
return False
return True
-def _par_read(dirname, compressed=True):
- """
- Internal write function to read a formatted parameter file.
-
- :type dirname: str
- :param dirname: Directory to read the parameter file from.
- :type compressed: bool
- :param compressed: Whether the directory is compressed or not.
- """
- from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
- from eqcorrscan.core.match_filter.template import Template
-
- templates = []
- if compressed:
- arc = tarfile.open(dirname, "r:*")
- members = arc.getmembers()
- _parfile = [member for member in members
- if member.name.split(os.sep)[-1] ==
- 'template_parameters.csv']
- if len(_parfile) == 0:
- arc.close()
- raise MatchFilterError(
- 'No template parameter file in archive')
- parfile = arc.extractfile(_parfile[0])
- else:
- parfile = open(dirname + '/' + 'template_parameters.csv', 'r')
- for line in parfile:
- t_in = Template()
- for key_pair in line.rstrip().split(','):
- if key_pair.split(':')[0].strip() == 'name':
- t_in.__dict__[key_pair.split(':')[0].strip()] = \
- key_pair.split(':')[-1].strip()
- elif key_pair.split(':')[0].strip() == 'filt_order':
- try:
- t_in.__dict__[key_pair.split(':')[0].strip()] = \
- int(key_pair.split(':')[-1])
- except ValueError:
- pass
- else:
- try:
- t_in.__dict__[key_pair.split(':')[0].strip()] = \
- float(key_pair.split(':')[-1])
- except ValueError:
- pass
- templates.append(t_in)
- parfile.close()
- if compressed:
- arc.close()
- return templates
-
-
-def _resolved(x):
- return os.path.realpath(os.path.abspath(x))
-
-
-def _badpath(path, base):
- """
- joinpath will ignore base if path is absolute.
- """
- return not _resolved(os.path.join(base, path)).startswith(base)
-
-
-def _badlink(info, base):
+def _remove_duplicates(party):
+ for family in party:
+ if family is not None:
+ # Slow uniq:
+ # family.detections = family._uniq().detections
+ # Very quick uniq:
+ det_tuples = [
+ (det.id, str(det.detect_time), det.detect_val)
+ for det in family]
+ # Retrieve the indices for the first occurrence of each
+ # detection in the family (so only unique detections will
+ # remain).
+ uniq_det_tuples, uniq_det_indices = np.unique(
+ det_tuples, return_index=True, axis=0)
+ uniq_detections = []
+ for uniq_det_index in uniq_det_indices:
+ uniq_detections.append(family[uniq_det_index])
+ family.detections = uniq_detections
+ return party
+
+
+def _moveout(st: Stream) -> float:
+ """ Maximum moveout across template in seconds. """
+ return max(tr.stats.starttime for tr in st) - min(
+ tr.stats.starttime for tr in st)
+
+
+def _mad(cccsum):
"""
- Links are interpreted relative to the directory containing the link
+ Internal helper to compute MAD-thresholds in parallel.
"""
- tip = _resolved(os.path.join(base, os.path.dirname(info.name)))
- return _badpath(info.linkname, base=tip)
+ return np.median(np.abs(cccsum))
-def _safemembers(members):
- """Check members of a tar archive for safety.
- Ensure that they do not contain paths or links outside of where we
- need them - this would only happen if the archive wasn't made by
- eqcorrscan.
+def _pickle_stream(stream: Stream, filename: str):
+ Logger.info(f"Pickling stream of {len(stream)} traces to {filename}")
+ with open(filename, "wb") as f:
+ pickle.dump(stream, f)
+ Logger.info(f"Pickled to {filename}")
+ return
- :type members: :class:`tarfile.TarFile`
- :param members: an open tarfile.
- """
- base = _resolved(".")
- for finfo in members:
- if _badpath(finfo.name, base):
- print(finfo.name, "is blocked (illegal path)")
- elif finfo.issym() and _badlink(finfo, base):
- print(finfo.name, "is blocked: Hard link to", finfo.linkname)
- elif finfo.islnk() and _badlink(finfo, base):
- print(finfo.name, "is blocked: Symlink to", finfo.linkname)
- else:
- yield finfo
+def _unpickle_stream(filename: str) -> Stream:
+ Logger.info(f"Unpickling from {filename}")
+ with open(filename, "rb") as f:
+ stream = pickle.load(f)
+ assert isinstance(stream, Stream)
+ Logger.info(f"Unpickled stream of {len(stream)} traces from {filename}")
+ return stream
def extract_from_stream(stream, detections, pad=5.0, length=30.0):
@@ -413,7 +351,8 @@ def normxcorr2(template, image):
"""
array_xcorr = get_array_xcorr()
# Check that we have been passed numpy arrays
- if type(template) != np.ndarray or type(image) != np.ndarray:
+ if (not isinstance(template, np.ndarray)
+ or not isinstance(image, np.ndarray)):
Logger.error(
'You have not provided numpy arrays, I will not convert them')
return 'NaN'
diff --git a/eqcorrscan/core/match_filter/helpers/processes.py b/eqcorrscan/core/match_filter/helpers/processes.py
new file mode 100644
index 000000000..7a85945c0
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/processes.py
@@ -0,0 +1,708 @@
+"""
+Functions for network matched-filter detection of seismic data.
+
+Designed to cross-correlate templates generated by template_gen function
+with data and output the detections.
+
+:copyright:
+ EQcorrscan developers.
+
+:license:
+ GNU Lesser General Public License, Version 3
+ (https://www.gnu.org/copyleft/lesser.html)
+"""
+import os
+import tempfile
+import time
+import traceback
+import logging
+import numpy as np
+
+from typing import List, Union, Iterable
+from timeit import default_timer
+
+from multiprocessing import Queue
+from queue import Empty
+
+from obspy import Stream
+
+from eqcorrscan.core.match_filter.helpers import (
+ _pickle_stream, _unpickle_stream)
+from eqcorrscan.core.match_filter.helpers.tribe import (
+ _download_st, _pre_process, _group, _detect,
+ _read_template_db, _make_party)
+
+from eqcorrscan.utils.correlate import (
+ _get_array_dicts, _fmf_stabilisation, _fmf_reshape)
+from eqcorrscan.utils.pre_processing import (
+ _quick_copy_stream, _prep_data_for_correlation)
+
+
+Logger = logging.getLogger(__name__)
+
+
+###############################################################################
+# Process handlers
+###############################################################################
+
+
+class Poison(Exception):
+ """
+ Exception passing within EQcorrscan
+
+ :type value: Exception
+ :param value: Exception to pass between processes
+ """
+ def __init__(self, value):
+ """
+ Poison Exception.
+ """
+ self.value = value
+
+ def __repr__(self):
+ return f"Poison({self.value.__repr__()})"
+
+ def __str__(self):
+ """
+ >>> print(Poison(Exception('alf')))
+ Poison(Exception('alf'))
+ """
+ return self.__repr__()
+
+
+def _get_and_check(input_queue: Queue, poison_queue: Queue, step: float = 0.5):
+ """
+ Get from a queue and check for poison - returns Poisoned if poisoned.
+
+ :param input_queue: Queue to get something from
+ :param poison_queue: Queue to check for poison
+
+ :return: Item from queue or Poison.
+ """
+ while True:
+ poison = _check_for_poison(poison_queue)
+ if poison:
+ return poison
+ if input_queue.empty():
+ time.sleep(step)
+ else:
+ return input_queue.get_nowait()
+
+
+def _check_for_poison(poison_queue: Queue) -> Union[Poison, None]:
+ """
+ Check if poison has been added to the queue.
+ """
+ Logger.debug("Checking for poison")
+ try:
+ poison = poison_queue.get_nowait()
+ except Empty:
+ return
+ # Put the poison back in the queue for another process to check on
+ Logger.error("Poisoned")
+ poison_queue.put(poison)
+ return Poison(poison)
+
+
+def _wait_on_output_to_be_available(
+ poison_queue: Queue,
+ output_queue: Queue,
+ raise_exception: bool = False,
+ item=None,
+ wait_warning: float = 60,
+) -> Union[Poison, None]:
+ """
+ Wait until the output queue is not full to put something in it.
+
+ :param poison_queue: Queue to put or containing poison
+ :param output_queue:
+ Output Queue to check whether we can put something in it
+ :param item: Thing to put in the queue when we can
+ :param raise_exception:
+ Whether to raise an exception on poison (True), or pass back (False)
+
+ :return: Poison if poisoned, or None if all okay
+
+ .. rubric:: Example
+
+ >>> from multiprocessing import Queue, Process
+ >>> poison_queue, output_queue = Queue(), Queue(maxsize=1)
+ >>> output_queue.put("Stopper")
+ >>> process = Process(
+ ... target=_wait_on_output_to_be_available,
+ ... kwargs={"poison_queue": poison_queue,
+ ... "output_queue": output_queue,
+ ... "raise_exception": True,
+ ... "item": "Carry on",
+ ... "wait_warning": 5})
+ >>> process.start()
+ >>> time.sleep(7)
+ >>> poison_queue.put(
+ ... Poison(Exception("cyanide")))
+ >>> time.sleep(7)
+ >>> process.is_alive()
+ False
+ >>> process.join()
+ """
+ killed = _check_for_poison(poison_queue)
+ # Wait until output queue is empty to limit rate and memory use
+ tic = default_timer()
+ while output_queue.full():
+ # Keep on checking while we wait
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ break
+ waited = default_timer() - tic
+ if waited > wait_warning:
+ Logger.debug("Waiting for output_queue to not be full")
+ tic = default_timer()
+ if not killed and item:
+ output_queue.put_nowait(item)
+ elif killed and raise_exception:
+ raise killed
+ return killed
+
+
+def _get_detection_stream(
+ template_channel_ids: List[tuple],
+ client,
+ input_time_queue: Queue,
+ retries: int,
+ min_gap: float,
+ buff: float,
+ output_filename_queue: Queue,
+ poison_queue: Queue,
+ temp_stream_dir: str,
+ full_stream_dir: str = None,
+ pre_process: bool = False,
+ parallel_process: bool = True,
+ process_cores: int = None,
+ daylong: bool = False,
+ overlap: Union[str, float] = "calculate",
+ ignore_length: bool = False,
+ ignore_bad_data: bool = False,
+ filt_order: int = None,
+ highcut: float = None,
+ lowcut: float = None,
+ samp_rate: float = None,
+ process_length: float = None,
+):
+ """
+ Get a stream to be used for detection from a client for a time period.
+
+ This function is designed to be run continuously within a Process and will
+ only stop when the next item in the input_time_queue is None.
+
+ This function uses .get_waveforms_bulk to get a Stream from a Client.
+ The specific time period to get data for is read from the input_time_queue.
+ Once the data have been loaded into memory from the Client, this function
+ then processes that Stream according to the processing parameters passed
+ as arguments to this function. Finally, this function writes the processed
+ Stream to disk in a temporary stream directory and puts the filename
+ for that stream in the output_filename_queue.
+
+ Optionally, an unprocessed version of the stream can be written to the
+ full_stream_dir directory to later provide an unprocessed copy of the
+ raw data. This can be helpful if data downloading is slow and the stream
+ is required for subsequent processing.
+
+ :param template_channel_ids:
+ Iterable of (network, station, location, channel) tuples to get data
+ for. Wildcards may be used if accepted by the client.
+ :param client:
+ Client-like object with at least a .get_waveforms_bulk method.
+ :param input_time_queue:
+ Queue of (starttime, endtime) tuples of UTCDateTimes to get data
+ between.
+ :param retries: See core.match_filter.tribe.client_detect
+ :param min_gap: See core.match_filter.tribe.client_detect
+ :param buff:
+ Length to pad downloaded data by - some clients do not provide all
+ data requested.
+ :param output_filename_queue:
+ Queue to put filenames of written streams into
+ :param poison_queue:
+ Queue to check for poison, or put poison into if something goes awry
+ :param temp_stream_dir:
+ Directory to write processed streams to.
+ :param full_stream_dir:
+ Directory to save unprocessed streams to. If None, will not be used.
+ :param pre_process: Whether to run pre-processing or not.
+ :param parallel_process:
+ Whether to process data in parallel (uses multi-threading)
+ :param process_cores:
+ Maximum number of cores to use for parallel processing
+ :param daylong: See utils.pre_processing.multi_process
+ :param overlap: See core.match_filter.tribe.detect
+ :param ignore_length: See utils.pre_processing.multi_process
+ :param ignore_bad_data: See utils.pre_processing.multi_process
+ :param filt_order: See utils.pre_processing.multi_process
+ :param highcut: See utils.pre_processing.multi_process
+ :param lowcut: See utils.pre_processing.multi_process
+ :param samp_rate: See utils.pre_processing.multi_process
+ :param process_length: See utils.pre_processing.multi_process
+ """
+ while True:
+ killed = _wait_on_output_to_be_available(
+ poison_queue=poison_queue, output_queue=output_filename_queue,
+ item=False)
+ if killed:
+ Logger.error("Killed")
+ break
+ try:
+ next_times = _get_and_check(input_time_queue, poison_queue)
+ if next_times is None:
+ break
+ if isinstance(next_times, Poison):
+ Logger.error("Killed")
+ break
+ starttime, endtime = next_times
+
+ st = _download_st(
+ starttime=starttime, endtime=endtime, buff=buff,
+ min_gap=min_gap, template_channel_ids=template_channel_ids,
+ client=client, retries=retries)
+ if len(st) == 0:
+ Logger.warning(f"No suitable data between {starttime} "
+ f"and {endtime}, skipping")
+ continue
+ Logger.info(f"Downloaded stream of {len(st)} traces:")
+ for tr in st:
+ Logger.info(tr)
+ # Try to reduce memory consumption by getting rid of st if we can
+ if full_stream_dir:
+ for tr in st:
+ tr.split().write(os.path.join(
+ full_stream_dir,
+ f"full_trace_{tr.id}_"
+ f"{tr.stats.starttime.strftime('%y-%m-%dT%H-%M-%S')}"
+ f".ms"), format="MSEED")
+ if not pre_process:
+ st_chunks = [st]
+ else:
+ template_ids = set(['.'.join(sid)
+ for sid in template_channel_ids])
+ # Group_process copies the stream.
+ st_chunks = _pre_process(
+ st=st, template_ids=template_ids, pre_processed=False,
+ filt_order=filt_order, highcut=highcut,
+ lowcut=lowcut, samp_rate=samp_rate,
+ process_length=process_length,
+ parallel=parallel_process, cores=process_cores,
+ daylong=daylong, ignore_length=ignore_length,
+ overlap=overlap, ignore_bad_data=ignore_bad_data)
+ # We don't need to hold on to st!
+ del st
+ for chunk in st_chunks:
+ Logger.info(f"After processing stream has {len(chunk)} traces:")
+ for tr in chunk:
+ Logger.info(tr)
+ if not os.path.isdir(temp_stream_dir):
+ os.makedirs(temp_stream_dir)
+ chunk_file = os.path.join(
+ temp_stream_dir,
+ f"chunk_{len(chunk)}_"
+ f"{chunk[0].stats.starttime.strftime('%Y-%m-%dT%H-%M-%S')}"
+ f"_{os.getpid()}.pkl")
+ # Add PID to cope with multiple instances operating at once
+ _pickle_stream(chunk, chunk_file)
+ # Wait for output queue to be ready
+ _wait_on_output_to_be_available(
+ poison_queue=poison_queue,
+ output_queue=output_filename_queue,
+ item=chunk_file, raise_exception=True)
+ del chunk
+ except Exception as e:
+ Logger.error(f"Caught exception {e} in downloader")
+ poison_queue.put(Poison(e))
+ traceback.print_tb(e.__traceback__)
+ break
+ # Wait for output queue to be ready
+ killed = _wait_on_output_to_be_available(
+ poison_queue=poison_queue,
+ output_queue=output_filename_queue,
+ raise_exception=False)
+ if killed:
+ poison_queue.put_nowait(killed)
+ else:
+ output_filename_queue.put(None)
+ return
+
+
+def _pre_processor(
+ input_stream_queue: Queue,
+ temp_stream_dir: str,
+ template_ids: set,
+ pre_processed: bool,
+ filt_order: int,
+ highcut: float,
+ lowcut: float,
+ samp_rate: float,
+ process_length: float,
+ parallel: bool,
+ cores: int,
+ daylong: bool,
+ ignore_length: bool,
+ overlap: float,
+ ignore_bad_data: bool,
+ output_filename_queue: Queue,
+ poison_queue: Queue,
+):
+ """
+ Consume a queue of input streams and process those streams.
+
+ This function is designed to be run continuously within a Process and will
+ only stop when the next item in the input_stream_queue is None.
+
+ This function consumes streams from the input_stream_queue and processes
+ them using utils.pre_processing functions. Processed streams are written
+ out to the temp_stream_dir and the filenames are produced in the
+ output_filename_queue.
+
+ :param input_stream_queue:
+ Input Queue to consume streams from.
+ :param temp_stream_dir: Directory to write processed streams to.
+ :param template_ids:
+ Iterable of seed ids in the template set. Only channels matching these
+ seed ids will be retained.
+ :param pre_processed: See core.match_filter.tribe.detect
+ :param filt_order: See utils.pre_processing.multi_process
+ :param highcut: See utils.pre_processing.multi_process
+ :param lowcut: See utils.pre_processing.multi_process
+ :param samp_rate: See utils.pre_processing.multi_process
+ :param process_length: See utils.pre_processing.multi_process
+ :param parallel: See utils.pre_processing.multi_process
+ :param cores: See utils.pre_processing.multi_process
+ :param daylong: See utils.pre_processing.multi_process
+ :param ignore_length: See utils.pre_processing.multi_process
+ :param overlap: See core.match_filter.tribe.detect
+ :param ignore_bad_data: See utils.pre_processing.multi_process
+ :param output_filename_queue:
+ Queue to put filenames of written streams into
+ :param poison_queue:
+ Queue to check for poison, or put poison into if something goes awry
+ """
+ while True:
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ break
+ Logger.debug("Getting stream from queue")
+ st = _get_and_check(input_stream_queue, poison_queue)
+ if st is None:
+ Logger.info("Ran out of streams, stopping processing")
+ break
+ elif isinstance(st, Poison):
+ Logger.error("Killed")
+ break
+ if len(st) == 0:
+ break
+ Logger.info(f"Processing stream:\n{st}")
+
+ # Process stream
+ try:
+ st_chunks = _pre_process(
+ st, template_ids, pre_processed, filt_order, highcut, lowcut,
+ samp_rate, process_length, parallel, cores, daylong,
+ ignore_length, ignore_bad_data, overlap)
+ for chunk in st_chunks:
+ if not os.path.isdir(temp_stream_dir):
+ os.makedirs(temp_stream_dir)
+ chunk_file = os.path.join(
+ temp_stream_dir,
+ f"chunk_{len(chunk)}_"
+ f"{chunk[0].stats.starttime.strftime('%Y-%m-%dT%H-%M-%S')}"
+ f"_{os.getpid()}.pkl")
+ # Add PID to cope with multiple instances operating at once
+ _pickle_stream(chunk, chunk_file)
+ # Wait for output queue to be ready
+ _wait_on_output_to_be_available(
+ poison_queue=poison_queue,
+ output_queue=output_filename_queue,
+ item=chunk_file, raise_exception=True)
+ del chunk
+ except Exception as e:
+ Logger.error(
+ f"Caught exception in processor:\n {e}")
+ poison_queue.put_nowait(Poison(e))
+ traceback.print_tb(e.__traceback__)
+ # Wait for output queue to be ready
+ killed = _wait_on_output_to_be_available(
+ poison_queue=poison_queue,
+ output_queue=output_filename_queue,
+ raise_exception=False)
+ if killed:
+ poison_queue.put_nowait(killed)
+ else:
+ output_filename_queue.put_nowait(None)
+ return
+
+
+def _prepper(
+ input_stream_filename_queue: Queue,
+ templates: Union[List, dict],
+ group_size: int,
+ groups: Iterable[Iterable[str]],
+ output_queue: Queue,
+ poison_queue: Queue,
+ xcorr_func: str = None,
+):
+ """
+ Prepare templates and stream for correlation.
+
+ This function is designed to be run continuously within a Process and will
+ only stop when the next item in the input_stream_queue is None.
+
+ This function prepares (reshapes into numpy arrays) templates and streams
+ and ensures that the data are suitable for the cross-correlation function
+ specified.
+
+ :param input_stream_filename_queue:
+ Input Queue to consume stream_filenames from.
+ :param templates:
+ Either (a) a list of Template objects, or (b) a dictionary of pickled
+ template filenames, keyed by template name.
+ :param group_size:
+ See core.match_filter.tribe.detect
+ :param groups:
+ Iterable of groups, where each group is an iterable of the template
+ names in that group.
+ :param output_queue:
+ Queue to produce inputs for correlation to.
+ :param poison_queue:
+ Queue to check for poison, or put poison into if something goes awry
+ :param xcorr_func:
+ Name of correlation function backend to be used.
+ """
+ if isinstance(templates, dict):
+ # We have been passed a db of template files on disk
+ Logger.info("Deserializing templates from disk")
+ try:
+ templates = _read_template_db(templates)
+ except Exception as e:
+ Logger.error(f"Could not read from db due to {e}")
+ poison_queue.put_nowait(Poison(e))
+ return
+
+ while True:
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ Logger.info("Killed in prepper")
+ break
+ Logger.info("Getting stream from queue")
+ st_file = _get_and_check(input_stream_filename_queue, poison_queue)
+ if st_file is None:
+ Logger.info("Got None for stream, prepper complete")
+ break
+ elif isinstance(st_file, Poison):
+ Logger.error("Killed")
+ break
+ if isinstance(st_file, Stream):
+ Logger.info("Stream provided")
+ st = st_file
+ # Write temporary cache of file
+ st_file = tempfile.NamedTemporaryFile().name
+ Logger.info(f"Writing temporary stream file to {st_file}")
+ try:
+ _pickle_stream(st, st_file)
+ except Exception as e:
+ Logger.error(
+ f"Could not write temporary file {st_file} due to {e}")
+ poison_queue.put_nowait(Poison(e))
+ break
+ Logger.info(f"Reading stream from {st_file}")
+ try:
+ st = _unpickle_stream(st_file)
+ except Exception as e:
+ Logger.error(f"Error reading {st_file}: {e}")
+ poison_queue.put_nowait(Poison(e))
+ break
+ st_sids = {tr.id for tr in st}
+ if len(st_sids) < len(st):
+ _sids = [tr.id for tr in st]
+ _duplicate_sids = {
+ sid for sid in st_sids if _sids.count(sid) > 1}
+ poison_queue.put_nowait(Poison(NotImplementedError(
+ f"Multiple channels in continuous data for "
+ f"{', '.join(_duplicate_sids)}")))
+ break
+ # Do the grouping for this stream
+ Logger.info(f"Grouping {len(templates)} templates into groups "
+ f"of {group_size} templates")
+ try:
+ template_groups = _group(sids=st_sids, templates=templates,
+ group_size=group_size, groups=groups)
+ except Exception as e:
+ Logger.error(e)
+ poison_queue.put_nowait(Poison(e))
+ break
+ Logger.info(f"Grouped into {len(template_groups)} groups")
+ for i, template_group in enumerate(template_groups):
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ break
+ try:
+ template_streams = [
+ _quick_copy_stream(t.st) for t in template_group]
+ template_names = [t.name for t in template_group]
+
+ # template_names, templates = zip(*template_group)
+ Logger.info(
+ f"Prepping {len(template_streams)} "
+ f"templates for correlation")
+ # We can just load in a fresh copy of the stream!
+ _st, template_streams, template_names = \
+ _prep_data_for_correlation(
+ stream=_unpickle_stream(st_file).merge(),
+ templates=template_streams,
+ template_names=template_names)
+ if len(_st) == 0:
+ Logger.error(
+ f"No traces returned from correlation prep: {_st}")
+ continue
+ starttime = _st[0].stats.starttime
+
+ if xcorr_func in (None, "fmf", "fftw"):
+ array_dict_tuple = _get_array_dicts(
+ template_streams, _st, stack=True)
+ stream_dict, template_dict, pad_dict, \
+ seed_ids = array_dict_tuple
+ if xcorr_func == "fmf":
+ Logger.info("Prepping data for FMF")
+ # Work out used channels here
+ tr_chans = np.array(
+ [~np.isnan(template_dict[seed_id]).any(axis=1)
+ for seed_id in seed_ids])
+ no_chans = np.sum(np.array(tr_chans).astype(int),
+ axis=0)
+ chans = [[] for _i in range(len(templates))]
+ for seed_id, tr_chan in zip(seed_ids, tr_chans):
+ for chan, state in zip(chans, tr_chan):
+ if state:
+ chan.append((seed_id.split('.')[1],
+ seed_id.split('.')[-1].split(
+ '_')[0]))
+ # Reshape
+ t_arr, d_arr, weights, pads = _fmf_reshape(
+ template_dict=template_dict,
+ stream_dict=stream_dict,
+ pad_dict=pad_dict, seed_ids=seed_ids)
+ # Stabilise
+ t_arr, d_arr, multipliers = _fmf_stabilisation(
+ template_arr=t_arr, data_arr=d_arr)
+ # Wait for output queue to be ready
+ _wait_on_output_to_be_available(
+ poison_queue=poison_queue,
+ output_queue=output_queue,
+ item=(starttime, i, d_arr, template_names, t_arr,
+ weights, pads, chans, no_chans),
+ raise_exception=True)
+ else:
+ Logger.info("Prepping data for FFTW")
+ # Wait for output queue to be ready
+ killed = _wait_on_output_to_be_available(
+ poison_queue=poison_queue,
+ output_queue=output_queue,
+ item=(starttime, i, stream_dict, template_names,
+ template_dict, pad_dict, seed_ids),
+ raise_exception=True)
+ else:
+ Logger.info("Prepping data for standard correlation")
+ # Wait for output queue to be ready
+ killed = _wait_on_output_to_be_available(
+ poison_queue=poison_queue, output_queue=output_queue,
+ item=(starttime, i, _st, template_names,
+ template_streams),
+ raise_exception=True)
+ except Exception as e:
+ Logger.error(f"Caught exception in Prepper: {e}")
+ traceback.print_tb(e.__traceback__)
+ poison_queue.put_nowait(Poison(e))
+ i += 1
+ Logger.info(f"Removing temporary {st_file}")
+ os.remove(st_file)
+ # Wait for output queue to be ready
+ killed = _wait_on_output_to_be_available(
+ poison_queue=poison_queue, output_queue=output_queue,
+ raise_exception=False)
+ if killed:
+ poison_queue.put_nowait(killed)
+ else:
+ output_queue.put_nowait(None)
+ return
+
+
+def _make_detections(
+ input_queue: Queue,
+ delta: float,
+ templates: Union[List, dict],
+ threshold: float,
+ threshold_type: str,
+ save_progress: bool,
+ output_queue: Queue,
+ poison_queue: Queue,
+):
+ """
+ Construct Detection objects from sparse detection information.
+
+ This function is designed to be run continuously within a Process and will
+ only stop when the next item in the input_queue is None.
+
+ :param input_queue:
+ Queue of (starttime, peaks, thresholds, no_channels, channels,
+ template_names). Detections are made within `peaks`.
+ :param delta:
+ Sample rate of peaks to detect within in Hz
+ :param templates:
+ Template objects included in input_queue
+ :param threshold:
+ Overall threshold
+ :param threshold_type:
+ Overall threshold type
+ :param save_progress:
+ Whether to save progress or not: If true, individual Party files will
+ be written each time this is run.
+ :param output_queue:
+ Queue of output Party filenames.
+ :param poison_queue:
+ Queue to check for poison, or put poison into if something goes awry
+ """
+ chunk_id = 0
+ while True:
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ break
+ try:
+ next_item = _get_and_check(input_queue, poison_queue)
+ if next_item is None:
+ Logger.info("_make_detections got None, stopping")
+ break
+ elif isinstance(next_item, Poison):
+ Logger.error("Killed")
+ break
+ starttime, all_peaks, thresholds, no_chans, \
+ chans, template_names = next_item
+ detections = _detect(
+ template_names=template_names, all_peaks=all_peaks,
+ starttime=starttime, delta=delta, no_chans=no_chans,
+ chans=chans, thresholds=thresholds)
+ Logger.info(f"Built {len(detections)}")
+ chunk_file = _make_party(
+ detections=detections, threshold=threshold,
+ threshold_type=threshold_type, templates=templates,
+ chunk_start=starttime, chunk_id=chunk_id,
+ save_progress=save_progress)
+ chunk_id += 1
+ output_queue.put_nowait(chunk_file)
+ except Exception as e:
+ Logger.error(
+ f"Caught exception in detector:\n {e}")
+ traceback.print_tb(e.__traceback__)
+ poison_queue.put_nowait(Poison(e))
+ output_queue.put_nowait(None)
+ return
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/helpers/tribe.py b/eqcorrscan/core/match_filter/helpers/tribe.py
new file mode 100644
index 000000000..9786a68fc
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/tribe.py
@@ -0,0 +1,693 @@
+"""
+Functions for network matched-filter detection of seismic data.
+
+Designed to cross-correlate templates generated by template_gen function
+with data and output the detections.
+
+:copyright:
+ EQcorrscan developers.
+
+:license:
+ GNU Lesser General Public License, Version 3
+ (https://www.gnu.org/copyleft/lesser.html)
+"""
+import os
+import pickle
+import logging
+import numpy as np
+
+from collections import defaultdict
+from typing import List, Set
+from timeit import default_timer
+
+from concurrent.futures import ThreadPoolExecutor
+
+from obspy import Stream, UTCDateTime
+
+from eqcorrscan.core.match_filter.template import (
+ Template, group_templates_by_seedid)
+from eqcorrscan.core.match_filter.detection import Detection
+from eqcorrscan.core.match_filter.party import Party
+from eqcorrscan.core.match_filter.family import Family
+from eqcorrscan.core.match_filter.helpers import _spike_test, _mad
+from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
+
+from eqcorrscan.utils.correlate import (
+ get_stream_xcorr, _stabalised_fmf, fftw_multi_normxcorr,
+ _zero_invalid_correlation_sums, _set_inner_outer_threading)
+from eqcorrscan.utils.pre_processing import (
+ _check_daylong, _group_process)
+from eqcorrscan.utils.findpeaks import multi_find_peaks
+from eqcorrscan.utils.plotting import _match_filter_plot
+
+Logger = logging.getLogger(__name__)
+
+
+def _wildcard_fill(
+ net: str, sta: str, loc: str, chan: str
+) -> [str, str, str, str]:
+ """
+ Convert none to wildcards. Cope with seisan channel naming.
+
+ .. rubric:: Example
+
+ >>> _wildcard_fill(None, None, None, None)
+ ('*', '*', '*', '*')
+ >>> _wildcard_fill("NZ", "FOZ", "10", "HZ")
+ ('NZ', 'FOZ', '10', 'H?Z')
+ """
+ if net in [None, '']:
+ net = "*"
+ if sta in [None, '']:
+ sta = "*"
+ if loc in [None, '']:
+ loc = "*"
+ if chan in [None, '']:
+ chan = "*"
+ # Cope with old seisan chans
+ if len(chan) == 2:
+ chan = f"{chan[0]}?{chan[-1]}"
+ return net, sta, loc, chan
+
+
+def _download_st(
+ starttime: UTCDateTime,
+ endtime: UTCDateTime,
+ buff: float,
+ min_gap: float,
+ template_channel_ids: List[tuple],
+ client,
+ retries: int
+) -> Stream:
+ """
+ Helper to download a stream from a client for a given start and end time.
+
+ Applies `buff` to extend download to (heopfully) ensure all data are
+ provided. Retries download up to `retries` times, and discards data
+ with large gaps.
+
+ :param starttime: Start time to download data from
+ :param endtime: End time to download data to
+ :param buff:
+ Length to pad downloaded data by - some clients do not provide all
+ data requested.
+ :param min_gap: See core.match_filter.tribe.client_detect
+ :param template_channel_ids:
+ :param client:
+ Client-like object with at least a .get_waveforms_bulk method.
+ :param retries: See core.match_filter.tribe.client_detect
+
+ :return: Stream as downloaded.
+ """
+ from obspy.clients.fdsn.header import FDSNException
+
+ bulk_info = []
+ for chan_id in template_channel_ids:
+ bulk_info.append((
+ chan_id[0], chan_id[1], chan_id[2], chan_id[3],
+ starttime - buff, endtime + buff))
+
+ for retry_attempt in range(retries):
+ try:
+ Logger.info(f"Downloading data between {starttime} and "
+ f"{endtime}")
+ st = client.get_waveforms_bulk(bulk_info)
+ Logger.info(
+ "Downloaded data for {0} traces".format(len(st)))
+ break
+ except FDSNException as e:
+ if "Split the request in smaller" in " ".join(e.args):
+ Logger.warning(
+ "Datacentre does not support large requests: "
+ "splitting request into smaller chunks")
+ st = Stream()
+ for _bulk in bulk_info:
+ try:
+ st += client.get_waveforms_bulk([_bulk])
+ except Exception as e:
+ Logger.error("No data for {0}".format(_bulk))
+ Logger.error(e)
+ continue
+ Logger.info("Downloaded data for {0} traces".format(
+ len(st)))
+ break
+ except Exception as e:
+ Logger.error(e)
+ continue
+ else:
+ raise MatchFilterError(
+ "Could not download data after {0} attempts".format(
+ retries))
+ # Get gaps and remove traces as necessary
+ if min_gap:
+ gaps = st.get_gaps(min_gap=min_gap)
+ if len(gaps) > 0:
+ Logger.warning("Large gaps in downloaded data")
+ st.merge()
+ gappy_channels = list(
+ set([(gap[0], gap[1], gap[2], gap[3])
+ for gap in gaps]))
+ _st = Stream()
+ for tr in st:
+ tr_stats = (tr.stats.network, tr.stats.station,
+ tr.stats.location, tr.stats.channel)
+ if tr_stats in gappy_channels:
+ Logger.warning(
+ "Removing gappy channel: {0}".format(tr))
+ else:
+ _st += tr
+ st = _st
+ st.split()
+ # Merge traces after gap checking
+ st = st.merge()
+ st.trim(starttime=starttime, endtime=endtime)
+
+ st_ids = [tr.id for tr in st]
+ # Remove traces that do not meet zero criteria
+ st.traces = [tr for tr in st if _check_daylong(tr.data)]
+ if len(st) < len(st_ids):
+ lost_ids = " ".join([tr_id for tr_id in st_ids
+ if tr_id not in [tr.id for tr in st]])
+ Logger.warning(
+ f"Removed data for {lost_ids} due to more zero datapoints "
+ f"than non-zero.")
+
+ st_ids = [tr.id for tr in st]
+ # Remove short traces
+ st.traces = [
+ tr for tr in st
+ if tr.stats.endtime - tr.stats.starttime > 0.8 * (endtime - starttime)]
+ if len(st) < len(st_ids):
+ lost_ids = " ".join([tr_id for tr_id in st_ids
+ if tr_id not in [tr.id for tr in st]])
+ Logger.warning(
+ f"Removed data for {lost_ids} due to less than 80% of the "
+ f"required length.")
+
+ return st
+
+
+def _pre_process(
+ st: Stream,
+ template_ids: set,
+ pre_processed: bool,
+ filt_order: int,
+ highcut: float,
+ lowcut: float,
+ samp_rate: float,
+ process_length: float,
+ parallel: bool,
+ cores: int,
+ daylong: bool,
+ ignore_length: bool,
+ ignore_bad_data: bool,
+ overlap: float, **kwargs
+) -> Stream:
+ """
+ Basic matched-filter processing flow. Data are processing in-place.
+
+ :param st: Stream to process
+ :param template_ids:
+ Iterable of seed ids in the template set. Only channels matching these
+ seed ids will be retained.
+ :param pre_processed: See core.match_filter.tribe.detect
+ :param filt_order: See utils.pre_processing.multi_process
+ :param highcut: See utils.pre_processing.multi_process
+ :param lowcut: See utils.pre_processing.multi_process
+ :param samp_rate: See utils.pre_processing.multi_process
+ :param process_length: See utils.pre_processing.multi_process
+ :param parallel: See utils.pre_processing.multi_process
+ :param cores: See utils.pre_processing.multi_process
+ :param daylong: See utils.pre_processing.multi_process
+ :param ignore_length: See utils.pre_processing.multi_process
+ :param overlap: See core.match_filter.tribe.detect
+ :param ignore_bad_data: See utils.pre_processing.multi_process
+ :param template_ids:
+
+ :return: Processed stream
+ """
+ # Retain only channels that have matches in templates
+ Logger.info(template_ids)
+ st = Stream([tr for tr in st if tr.id in template_ids])
+ Logger.info(f"Processing {(len(st))} channels")
+ if len(st) == 0:
+ raise IndexError(
+ "No matching channels between stream and templates")
+ tic = default_timer()
+ _spike_test(st)
+ toc = default_timer()
+ Logger.info(f"Checking for spikes took {toc - tic:.4f} s")
+ if not pre_processed:
+ st_chunks = _group_process(
+ filt_order=filt_order,
+ highcut=highcut,
+ lowcut=lowcut,
+ samp_rate=samp_rate,
+ process_length=process_length,
+ parallel=parallel,
+ cores=cores,
+ stream=st,
+ daylong=daylong,
+ ignore_length=ignore_length,
+ overlap=overlap,
+ ignore_bad_data=ignore_bad_data)
+ else:
+ st_chunks = [st]
+ Logger.info(f"Stream has been split into {len(st_chunks)} chunks")
+ return st_chunks
+
+
+def _group(
+ sids: Set[str],
+ templates: List[Template],
+ group_size: int,
+ groups: List[List[str]] = None
+) -> List[List[Template]]:
+ """
+ Group templates either by seed id, or using pre-computed groups
+
+ :param sids: Seed IDs available in stream
+ :param templates: Templates to group
+ :param group_size: Maximum group size
+ :param groups: [Optional] List of List of template names in groups
+ :return: Groups of templates.
+ """
+ Logger.info(f"Grouping for {sids}")
+ if groups:
+ Logger.info("Using pre-computed groups")
+ t_dict = {t.name: t for t in templates}
+ template_groups = []
+ for grp in groups:
+ template_group = [
+ t_dict.get(t_name) for t_name in grp
+ if t_name in t_dict.keys()]
+ if len(template_group):
+ template_groups.append(template_group)
+ return template_groups
+ template_groups = group_templates_by_seedid(
+ templates=templates,
+ st_seed_ids=sids,
+ group_size=group_size)
+ if len(template_groups) == 1 and len(template_groups[0]) == 0:
+ Logger.error("No matching ids between stream and templates")
+ raise IndexError("No matching ids between stream and templates")
+ return template_groups
+
+
+def _corr_and_peaks(
+ templates: List[Template],
+ template_names: List[str],
+ stream: Stream,
+ xcorr_func: str,
+ concurrency: str,
+ cores: int,
+ i: int,
+ export_cccsums: bool,
+ parallel: bool,
+ peak_cores: int,
+ threshold: float,
+ threshold_type: str,
+ trig_int: float,
+ sampling_rate: float,
+ full_peaks: bool,
+ plot: bool,
+ plotdir: str,
+ plot_format: str,
+ prepped: bool = False,
+ **kwargs
+):
+ """
+ Compute cross-correlation between templates and a stream. Returns peaks in
+ correlation function.
+
+ :param templates: Templates to correlate
+ :param template_names: Names of templates (ordered as templates)
+ :param stream: Stream to correlate templates with
+ :param xcorr_func: Cross-correlation function to use
+ :param concurrency: Concurrency of cross-correlation function
+ :param cores: Cores (threads) to use for cross-correlation
+ :param i: Group-id (internal book-keeping)
+ :param export_cccsums: Whether to export the raw cross-correlation sums
+ :param parallel: Whether to compute peaks in parallel
+ :param peak_cores: Number of cores (threads) to use for peak finding
+ :param threshold: Threshold value (user-defined)
+ :param threshold_type: Threshold type (e.g. MAD, ...)
+ :param trig_int: Trigger interval in seconds
+ :param sampling_rate: Sampling rate of data
+ :param full_peaks: Whether to compute full peaks, or fast peaks.
+ :param plot: Whether to plot correlation sums and peaks or not
+ :param plotdir: Where to save plots if made
+ :param plot_format: What format (extension) to use for plots.
+ :param prepped:
+ Whether data have already been prepared for correlation or not.
+ If prepped, inputs change for a specific xcorr-function, see code.
+
+ :return: Peaks, thresholds, number of channels, channels for each template
+ """
+ # Special cases for fmf and fftw to minimize reshaping time.
+ Logger.info(
+ f"Starting correlation run for template group {i}")
+ tic = default_timer()
+ if prepped and xcorr_func == "fmf":
+ assert isinstance(templates, np.ndarray)
+ assert isinstance(stream, np.ndarray)
+ # These need to be passed from queues.
+ pads = kwargs.get('pads')
+ weights = kwargs.get('weights')
+ chans = kwargs.get("chans")
+ no_chans = kwargs.get("no_chans")
+ # We do not care about removing the gain from our data, we copied it.
+ multipliers = np.ones((len(stream), 1))
+ step = 1 # We only implement single-step correlations
+ if concurrency in ("multithread", "multiprocess"):
+ arch = "cpu"
+ else:
+ arch = "gpu"
+ cccsums = _stabalised_fmf(
+ template_arr=templates, data_arr=stream, weights=weights,
+ pads=pads, arch=arch, multipliers=multipliers, step=step)
+ elif prepped and xcorr_func in ("fftw", None):
+ assert isinstance(templates, dict)
+ assert isinstance(stream, dict)
+ pads = kwargs.pop('pads')
+ seed_ids = kwargs.pop("seed_ids")
+ num_cores_inner, num_cores_outer = _set_inner_outer_threading(
+ kwargs.get('cores', None), kwargs.get("cores_outer", None),
+ len(stream))
+
+ cccsums, tr_chans = fftw_multi_normxcorr(
+ template_array=templates, stream_array=stream,
+ pad_array=pads, seed_ids=seed_ids, cores_inner=num_cores_inner,
+ cores_outer=num_cores_outer, stack=True, **kwargs)
+ n_templates = len(cccsums)
+ # Post processing
+ no_chans = np.sum(np.array(tr_chans).astype(int), axis=0)
+ chans = [[] for _i in range(n_templates)]
+ for seed_id, tr_chan in zip(seed_ids, tr_chans):
+ for chan, state in zip(chans, tr_chan):
+ if state:
+ chan.append(seed_id)
+ cccsums = _zero_invalid_correlation_sums(cccsums, pads, chans)
+ chans = [[(seed_id.split('.')[1], seed_id.split('.')[-1].split('_')[0])
+ for seed_id in _chans] for _chans in chans]
+ else:
+ # The default just uses stream xcorr funcs.
+ multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency)
+ cccsums, no_chans, chans = multichannel_normxcorr(
+ templates=templates, stream=stream, cores=cores, **kwargs
+ )
+ if len(cccsums[0]) == 0:
+ raise MatchFilterError(
+ f"Correlation has not run for group {i}, "
+ f"zero length cccsum")
+ toc = default_timer()
+ Logger.info(
+ f"Correlations for group {i} of {len(template_names)} "
+ f"templates took {toc - tic:.4f} s")
+ Logger.debug(
+ f"The shape of the returned cccsums in group {i} "
+ f"is: {cccsums.shape}")
+ Logger.debug(
+ f'This is from {len(templates)} templates correlated with '
+ f'{len(stream)} channels of data in group {i}')
+
+ # Handle saving correlation stats
+ if export_cccsums:
+ for i, cccsum in enumerate(cccsums):
+ fname = (
+ f"{template_names[i]}-{stream[0].stats.starttime}-"
+ f"{stream[0].stats.endtime}_cccsum.npy")
+ np.save(file=fname, arr=cccsum)
+ Logger.info(
+ f"Saved correlation statistic to {fname}")
+
+ # Zero mean check
+ if np.any(np.abs(cccsums.mean(axis=-1)) > 0.05):
+ Logger.warning(
+ 'Mean of correlations is non-zero! Check this!')
+ if parallel:
+ Logger.info(f"Finding peaks using {peak_cores} threads")
+ else:
+ Logger.info("Finding peaks in serial")
+ # This is in the main process because transferring
+ # lots of large correlation sums in queues is very slow
+ all_peaks, thresholds = _threshold(
+ cccsums=cccsums, no_chans=no_chans,
+ template_names=template_names, threshold=threshold,
+ threshold_type=threshold_type,
+ trig_int=int(trig_int * sampling_rate),
+ parallel=parallel, full_peaks=full_peaks,
+ peak_cores=peak_cores, plot=plot, stream=stream,
+ plotdir=plotdir, plot_format=plot_format)
+ return all_peaks, thresholds, no_chans, chans
+
+
+def _threshold(
+ cccsums: np.ndarray,
+ no_chans: list,
+ template_names: list,
+ threshold: float,
+ threshold_type: str,
+ trig_int: int, # converted to samples before getting to this func.
+ parallel: bool,
+ full_peaks: bool,
+ peak_cores: int,
+ plot: bool,
+ stream: Stream,
+ plotdir: str,
+ plot_format: str,
+):
+ """
+ Find peaks within correlation functions for given thresholds.
+
+ :param cccsums: Numpy array of correlations [templates x samples]
+ :param no_chans:
+ Number of channels for each correlation (ordered as cccsums)
+ :param template_names:
+ Template names for each correlation (ordered as cccsums)
+ :param threshold: Input threshold value
+ :param threshold_type: Input threshold type (e.g. MAD, ...)
+ :param trig_int: Trigger interval in SAMPLES.
+ :param parallel: Whether to compute peaks in parallel
+ :param full_peaks: Whether to compute full peaks or not
+ :param peak_cores: Number of cores (threads) to use for peak finding.
+ :param plot: Whether to plot the peak finding
+ :param stream: Stream for plotting (not needed otherwise)
+ :param plotdir: Directory to write plots to
+ :param plot_format: Format to save plots in
+
+ :return: (all peaks, used thresholds)
+ """
+ Logger.debug(f"Got cccsums shaped {cccsums.shape}")
+ Logger.debug(f"From {len(template_names)} templates")
+
+ tic = default_timer()
+ if str(threshold_type) == str("absolute"):
+ thresholds = [threshold for _ in range(len(cccsums))]
+ elif str(threshold_type) == str('MAD'):
+ median_cores = min([peak_cores, len(cccsums)])
+ if cccsums.size < 2e7: # parallelism not worth it
+ median_cores = 1
+ with ThreadPoolExecutor(max_workers=median_cores) as executor:
+ # Because numpy releases GIL threading can use
+ # multiple cores
+ medians = executor.map(_mad, cccsums,
+ chunksize=len(cccsums) // median_cores)
+ thresholds = [threshold * median for median in medians]
+ else:
+ thresholds = [threshold * no_chans[i]
+ for i in range(len(cccsums))]
+ toc = default_timer()
+ Logger.info(f"Computing thresholds took {toc - tic: .4f} s")
+ outtic = default_timer()
+ all_peaks = multi_find_peaks(
+ arr=cccsums, thresh=thresholds, parallel=parallel,
+ trig_int=trig_int, full_peaks=full_peaks, cores=peak_cores)
+ outtoc = default_timer()
+ Logger.info(f"Finding peaks for group took {outtoc - outtic:.4f}s")
+
+ # Plotting
+ if plot and stream:
+ for i, cccsum in enumerate(cccsums):
+ _match_filter_plot(
+ stream=stream, cccsum=cccsum,
+ template_names=template_names,
+ rawthresh=thresholds[i], plotdir=plotdir,
+ plot_format=plot_format, i=i)
+ else:
+ Logger.error("Plotting enabled but not stream found to plot")
+
+ return all_peaks, thresholds
+
+
+def _detect(
+ template_names: List[str],
+ all_peaks: np.ndarray,
+ starttime: UTCDateTime,
+ delta: float,
+ no_chans: List[int],
+ chans: List[List[str]],
+ thresholds: List[float]
+) -> List[Detection]:
+ """
+ Convert peaks to Detection objects
+
+ :param template_names: Lis of template names
+ :param all_peaks: Array of peaks orders as template_names
+ :param starttime: Starttime for peak index relative time
+ :param delta: Sample interval to convert peaks from samples to time
+ :param no_chans: Number of channels used (ordered as template_names)
+ :param chans: Channels used (ordered as template_names)
+ :param thresholds: Thresholds used (ordered as template_names)
+
+ :return: List of detections.
+ """
+ tic = default_timer()
+ detections = []
+ for i, template_name in enumerate(template_names):
+ if not all_peaks[i]:
+ Logger.debug(f"Found 0 peaks for template {template_name}")
+ continue
+ Logger.debug(f"Found {len(all_peaks[i])} detections "
+ f"for template {template_name}")
+ for peak in all_peaks[i]:
+ detecttime = starttime + (peak[1] * delta)
+ if peak[0] > no_chans[i]:
+ Logger.error(f"Correlation sum {peak[0]} exceeds "
+ f"bounds ({no_chans[i]}")
+ detection = Detection(
+ template_name=template_name, detect_time=detecttime,
+ no_chans=no_chans[i], detect_val=peak[0],
+ threshold=thresholds[i], typeofdet='corr',
+ chans=chans[i],
+ threshold_type=None,
+ # Update threshold_type and threshold outside of this func.
+ threshold_input=None)
+ detections.append(detection)
+ toc = default_timer()
+ Logger.info(f"Forming detections took {toc - tic:.4f} s")
+ return detections
+
+
+def _load_template(t_file: str) -> Template:
+ """ Load a pickled template from a file """
+ try:
+ with open(t_file, "rb") as f:
+ t = pickle.load(f)
+ except Exception as e:
+ Logger.warning(f"Could not read template from {t_file} due to {e}")
+ return None
+ assert isinstance(t, Template), "Loaded object is not a Template, aborting"
+ return t
+
+
+def _read_template_db(template_file_dict: dict) -> List[Template]:
+ """
+ Read templates from files on disk.
+
+ :param template_file_dict: Template file names keyed by template name
+
+ :returns: list of templates
+ """
+ with ThreadPoolExecutor() as executor:
+ templates = executor.map(_load_template, template_file_dict.values())
+ templates = [t for t in templates if t]
+ Logger.info(f"Deserialized {len(templates)} templates")
+ if len(templates) < len(template_file_dict):
+ Logger.warning(f"Expected {len(template_file_dict)} templates, "
+ f"but found {len(templates)}")
+ return templates
+
+
+def _make_party(
+ detections: List[Detection],
+ threshold: float,
+ threshold_type: str,
+ templates: List[Template],
+ chunk_start: UTCDateTime,
+ chunk_id: int,
+ save_progress: bool
+) -> str:
+ """
+ Construct a Party from Detections.
+
+ :param detections: List of detections
+ :param threshold: Input threshold
+ :param threshold_type: Input threshold type
+ :param templates: Templates used in detections
+ :param chunk_start: Starttime of party epoch
+ :param chunk_id: Internal index for party epoch
+ :param save_progress: Whether to save progress or not
+
+ :return: The filename the party has been pickled to.
+ """
+ chunk_dir = os.path.join(
+ ".parties", "{chunk_start.year}", "{chunk_start.julday:03d}")
+ chunk_file_str = os.path.join(
+ chunk_dir, "chunk_party_{chunk_start_str}_{chunk_id}_{pid}.pkl")
+ # Process ID included in chunk file to avoid multiple processes writing
+ # and reading and removing the same files.
+
+ # Get the results out of the end!
+ Logger.info(f"Made {len(detections)} detections")
+
+ # post - add in threshold, threshold_type to all detections
+ Logger.info("Adding threshold to detections")
+ for detection in detections:
+ detection.threshold_input = threshold
+ detection.threshold_type = threshold_type
+
+ # Select detections very quickly: detection order does not
+ # change, make dict of keys: template-names and values:
+ # list of indices and use indices to select
+ Logger.info("Making dict of detections")
+ detection_idx_dict = defaultdict(list)
+ for n, detection in enumerate(detections):
+ detection_idx_dict[detection.template_name].append(n)
+
+ # Convert to Families and build party.
+ Logger.info("Converting to party and making events")
+ chunk_party = Party()
+
+ # Make a dictionary of templates keyed by name - we could be passed a dict
+ # of pickled templates
+ if not isinstance(templates, dict):
+ templates = {t.name: t for t in templates}
+
+ for t_name, template in templates.items():
+ family_detections = [
+ detections[idx]
+ for idx in detection_idx_dict[t_name]]
+ # Make party sparse - only write out families with detections
+ if len(family_detections):
+ if not isinstance(template, Template):
+ # Try and read this from disk
+ with open(template, "rb") as f:
+ template = pickle.load(f)
+ for d in family_detections:
+ d._calculate_event(template=template)
+ family = Family(
+ template=template, detections=family_detections)
+ chunk_party += family
+
+ Logger.info("Pickling party")
+ if not os.path.isdir(chunk_dir.format(chunk_start=chunk_start)):
+ os.makedirs(chunk_dir.format(chunk_start=chunk_start))
+
+ chunk_file = chunk_file_str.format(
+ chunk_start_str=chunk_start.strftime("%Y-%m-%dT%H-%M-%S"),
+ chunk_start=chunk_start,
+ chunk_id=chunk_id, pid=os.getpid())
+ with open(chunk_file, "wb") as _f:
+ pickle.dump(chunk_party, _f)
+ Logger.info("Completed party processing")
+
+ if save_progress:
+ Logger.info(f"Written chunk to {chunk_file}")
+ return chunk_file
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/matched_filter.py b/eqcorrscan/core/match_filter/matched_filter.py
index 7c6c50dd9..b40d2fa63 100644
--- a/eqcorrscan/core/match_filter/matched_filter.py
+++ b/eqcorrscan/core/match_filter/matched_filter.py
@@ -12,18 +12,11 @@
(https://www.gnu.org/copyleft/lesser.html)
"""
import logging
-from timeit import default_timer
import numpy as np
-from obspy import Catalog, UTCDateTime, Stream
+from obspy import Stream
-from eqcorrscan.core.match_filter.helpers import (
- _spike_test, extract_from_stream)
-
-from eqcorrscan.utils.correlate import get_stream_xcorr
-from eqcorrscan.utils.findpeaks import multi_find_peaks
-from eqcorrscan.utils.pre_processing import (
- dayproc, shortproc, _prep_data_for_correlation)
+from eqcorrscan.core.match_filter.helpers import extract_from_stream
Logger = logging.getLogger(__name__)
@@ -65,330 +58,13 @@ def __str__(self):
return self.value
-def _group_detect(templates, stream, threshold, threshold_type, trig_int,
- plot=False, plotdir=None, group_size=None,
- pre_processed=False, daylong=False, parallel_process=True,
- xcorr_func=None, concurrency=None, cores=None,
- ignore_length=False, ignore_bad_data=False,
- overlap="calculate", full_peaks=False, process_cores=None,
- **kwargs):
- """
- Pre-process and compute detections for a group of templates.
-
- Will process the stream object, so if running in a loop, you will want
- to copy the stream before passing it to this function.
-
- :type templates: list
- :param templates: List of :class:`eqcorrscan.core.match_filter.Template`
- :type stream: `obspy.core.stream.Stream`
- :param stream: Continuous data to detect within using the Template.
- :type threshold: float
- :param threshold:
- Threshold level, if using `threshold_type='MAD'` then this will be
- the multiple of the median absolute deviation.
- :type threshold_type: str
- :param threshold_type:
- The type of threshold to be used, can be MAD, absolute or
- av_chan_corr. See Note on thresholding below.
- :type trig_int: float
- :param trig_int:
- Minimum gap between detections from one template in seconds.
- If multiple detections occur within trig_int of one-another, the one
- with the highest cross-correlation sum will be selected.
- :type plot: bool
- :param plot:
- Turn plotting on or off.
- :type plotdir: str
- :param plotdir:
- The path to save plots to. If `plotdir=None` (default) then the
- figure will be shown on screen.
- :type group_size: int
- :param group_size:
- Maximum number of templates to run at once, use to reduce memory
- consumption, if unset will use all templates.
- :type pre_processed: bool
- :param pre_processed:
- Set to True if `stream` has already undergone processing, in this
- case eqcorrscan will only check that the sampling rate is correct.
- Defaults to False, which will use the
- :mod:`eqcorrscan.utils.pre_processing` routines to resample and
- filter the continuous data.
- :type daylong: bool
- :param daylong:
- Set to True to use the
- :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
- preforms additional checks and is more efficient for day-long data
- over other methods.
- :type parallel_process: bool
- :param parallel_process:
- :type xcorr_func: str or callable
- :param xcorr_func:
- A str of a registered xcorr function or a callable for implementing
- a custom xcorr function. For more details see:
- :func:`eqcorrscan.utils.correlate.register_array_xcorr`
- :type concurrency: str
- :param concurrency:
- The type of concurrency to apply to the xcorr function. Options are
- 'multithread', 'multiprocess', 'concurrent'. For more details see
- :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
- :type cores: int
- :param cores: Number of workers for processing and correlation.
- :type ignore_length: bool
- :param ignore_length:
- If using daylong=True, then dayproc will try check that the data
- are there for at least 80% of the day, if you don't want this check
- (which will raise an error if too much data are missing) then set
- ignore_length=True. This is not recommended!
- :type overlap: float
- :param overlap:
- Either None, "calculate" or a float of number of seconds to
- overlap detection streams by. This is to counter the effects of
- the delay-and-stack in calculating cross-correlation sums. Setting
- overlap = "calculate" will work out the appropriate overlap based
- on the maximum lags within templates.
- :type full_peaks: bool
- :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks_compiled`
- :type process_cores: int
- :param process_cores:
- Number of processes to use for pre-processing (if different to
- `cores`).
-
- :return:
- :class:`eqcorrscan.core.match_filter.Party` of families of detections.
- """
- from eqcorrscan.core.match_filter.party import Party
- from eqcorrscan.core.match_filter.family import Family
-
- master = templates[0]
- peak_cores = kwargs.get('peak_cores', process_cores)
- kwargs.update(dict(peak_cores=peak_cores))
- # Check that they are all processed the same.
- lap = 0.0
- for template in templates:
- starts = [t.stats.starttime for t in template.st.sort(['starttime'])]
- if starts[-1] - starts[0] > lap:
- lap = starts[-1] - starts[0]
- if not template.same_processing(master):
- raise MatchFilterError('Templates must be processed the same.')
- if overlap is None:
- overlap = 0.0
- elif not isinstance(overlap, float) and str(overlap) == str("calculate"):
- overlap = lap
- elif not isinstance(overlap, float):
- raise NotImplementedError(
- "%s is not a recognised overlap type" % str(overlap))
- if overlap >= master.process_length:
- Logger.warning(
- f"Overlap of {overlap} s is greater than process "
- f"length ({master.process_length} s), ignoring overlap")
- overlap = 0
- if not pre_processed:
- if process_cores is None:
- process_cores = cores
- streams = _group_process(
- template_group=templates, parallel=parallel_process,
- cores=process_cores, stream=stream, daylong=daylong,
- ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
- overlap=overlap)
- for _st in streams:
- Logger.debug(f"Processed stream:\n{_st.__str__(extended=True)}")
- else:
- Logger.warning('Not performing any processing on the continuous data.')
- streams = [stream]
- detections = []
- party = Party()
- if group_size is not None:
- n_groups = int(len(templates) / group_size)
- if n_groups * group_size < len(templates):
- n_groups += 1
- else:
- n_groups = 1
- kwargs.update({'peak_cores': kwargs.get('peak_cores', process_cores)})
- for st_chunk in streams:
- chunk_start, chunk_end = (min(tr.stats.starttime for tr in st_chunk),
- max(tr.stats.endtime for tr in st_chunk))
- Logger.info(
- f'Computing detections between {chunk_start} and {chunk_end}')
- st_chunk.trim(starttime=chunk_start, endtime=chunk_end)
- for tr in st_chunk:
- if len(tr) > len(st_chunk[0]):
- tr.data = tr.data[0:len(st_chunk[0])]
- for i in range(n_groups):
- if group_size is not None:
- end_group = (i + 1) * group_size
- start_group = i * group_size
- if i == n_groups:
- end_group = len(templates)
- else:
- end_group = len(templates)
- start_group = 0
- template_group = [t for t in templates[start_group: end_group]]
- detections += match_filter(
- template_names=[t.name for t in template_group],
- template_list=[t.st for t in template_group], st=st_chunk,
- xcorr_func=xcorr_func, concurrency=concurrency,
- threshold=threshold, threshold_type=threshold_type,
- trig_int=trig_int, plot=plot, plotdir=plotdir, cores=cores,
- full_peaks=full_peaks, **kwargs)
- for template in template_group:
- family = Family(template=template, detections=[])
- for detection in detections:
- if detection.template_name == template.name:
- for pick in detection.event.picks:
- pick.time += template.prepick
- for origin in detection.event.origins:
- origin.time += template.prepick
- family.detections.append(detection)
- party += family
- return party
-
-
-def _group_process(template_group, parallel, cores, stream, daylong,
- ignore_length, ignore_bad_data, overlap):
- """
- Process data into chunks based on template processing length.
-
- Templates in template_group must all have the same processing parameters.
-
- :type template_group: list
- :param template_group: List of Templates.
- :type parallel: bool
- :param parallel: Whether to use parallel processing or not
- :type cores: int
- :param cores: Number of cores to use, can be False to use all available.
- :type stream: :class:`obspy.core.stream.Stream`
- :param stream: Stream to process, will be left intact.
- :type daylong: bool
- :param daylong: Whether to enforce day-length files or not.
- :type ignore_length: bool
- :param ignore_length:
- If using daylong=True, then dayproc will try check that the data
- are there for at least 80% of the day, if you don't want this check
- (which will raise an error if too much data are missing) then set
- ignore_length=True. This is not recommended!
- :type ignore_bad_data: bool
- :param ignore_bad_data:
- If False (default), errors will be raised if data are excessively
- gappy or are mostly zeros. If True then no error will be raised, but
- an empty trace will be returned.
- :type overlap: float
- :param overlap: Number of seconds to overlap chunks by.
-
- :return: list of processed streams.
- """
- master = template_group[0]
- processed_streams = []
- kwargs = {
- 'filt_order': master.filt_order,
- 'highcut': master.highcut, 'lowcut': master.lowcut,
- 'samp_rate': master.samp_rate, 'parallel': parallel,
- 'num_cores': cores, 'ignore_length': ignore_length,
- 'ignore_bad_data': ignore_bad_data}
- # Processing always needs to be run to account for gaps - pre-process will
- # check whether filtering and resampling needs to be done.
- process_length = master.process_length
- if daylong:
- if not master.process_length == 86400:
- Logger.warning(
- 'Processing day-long data, but template was cut from %i s long'
- ' data, will reduce correlations' % master.process_length)
- func = dayproc
- process_length = 86400
- # Check that data all start on the same day, otherwise strange
- # things will happen...
- starttimes = [tr.stats.starttime.date for tr in stream]
- if not len(list(set(starttimes))) == 1:
- Logger.warning('Data start on different days, setting to last day')
- starttime = UTCDateTime(
- stream.sort(['starttime'])[-1].stats.starttime.date)
- else:
- starttime = stream.sort(['starttime'])[0].stats.starttime
- else:
- # We want to use shortproc to allow overlaps
- func = shortproc
- starttime = stream.sort(['starttime'])[0].stats.starttime
- endtime = stream.sort(['endtime'])[-1].stats.endtime
- data_len_samps = round((endtime - starttime) * master.samp_rate) + 1
- assert overlap < process_length, "Overlap must be less than process length"
- chunk_len_samps = (process_length - overlap) * master.samp_rate
- n_chunks = int(data_len_samps // chunk_len_samps)
- Logger.info(f"Splitting these data in {n_chunks} chunks")
- if n_chunks == 0:
- Logger.error('Data must be process_length or longer, not computing')
- _endtime = starttime
- for i in range(n_chunks):
- kwargs.update(
- {'starttime': starttime + (i * (process_length - overlap))})
- if not daylong:
- _endtime = kwargs['starttime'] + process_length
- kwargs.update({'endtime': _endtime})
- else:
- _endtime = kwargs['starttime'] + 86400
- chunk_stream = stream.slice(starttime=kwargs['starttime'],
- endtime=_endtime).copy()
- Logger.debug(f"Processing chunk {i} between {kwargs['starttime']} "
- f"and {_endtime}")
- if len(chunk_stream) == 0:
- Logger.warning(
- f"No data between {kwargs['starttime']} and {_endtime}")
- continue
- for tr in chunk_stream:
- tr.data = tr.data[0:int(
- process_length * tr.stats.sampling_rate)]
- _chunk_stream_lengths = {
- tr.id: tr.stats.endtime - tr.stats.starttime
- for tr in chunk_stream}
- for tr_id, chunk_length in _chunk_stream_lengths.items():
- # Remove traces that are too short.
- if not ignore_length and chunk_length <= .8 * process_length:
- tr = chunk_stream.select(id=tr_id)[0]
- chunk_stream.remove(tr)
- Logger.warning(
- "Data chunk on {0} starting {1} and ending {2} is "
- "below 80% of the requested length, will not use"
- " this.".format(
- tr.id, tr.stats.starttime, tr.stats.endtime))
- if len(chunk_stream) > 0:
- Logger.debug(
- f"Processing chunk:\n{chunk_stream.__str__(extended=True)}")
- _processed_stream = func(st=chunk_stream, **kwargs)
- # If data have more zeros then pre-processing will return a
- # trace of 0 length
- _processed_stream.traces = [
- tr for tr in _processed_stream if tr.stats.npts != 0]
- if len(_processed_stream) == 0:
- Logger.warning(
- f"Data quality insufficient between {kwargs['starttime']}"
- f" and {_endtime}")
- continue
- # Pre-procesing does additional checks for zeros - we need to check
- # again whether we actually have something useful from this.
- processed_chunk_stream_lengths = [
- tr.stats.endtime - tr.stats.starttime
- for tr in _processed_stream]
- if min(processed_chunk_stream_lengths) >= .8 * process_length:
- processed_streams.append(_processed_stream)
- else:
- Logger.warning(
- f"Data quality insufficient between {kwargs['starttime']}"
- f" and {_endtime}")
- continue
-
- if _endtime < stream[0].stats.endtime:
- Logger.warning(
- "Last bit of data between {0} and {1} will go unused "
- "because it is shorter than a chunk of {2} s".format(
- _endtime, stream[0].stats.endtime, process_length))
- return processed_streams
-
-
+# Note: maintained for backwards compatability. All efforts now in tribes
def match_filter(template_names, template_list, st, threshold,
threshold_type, trig_int, plot=False, plotdir=None,
xcorr_func=None, concurrency=None, cores=None,
- plot_format='png', output_cat=False, output_event=True,
+ plot_format='png', output_cat=False,
extract_detections=False, arg_check=True, full_peaks=False,
- peak_cores=None, spike_test=True, copy_data=True,
- export_cccsums=False, **kwargs):
+ peak_cores=None, export_cccsums=False, **kwargs):
"""
Main matched-filter detection function.
@@ -577,41 +253,8 @@ def match_filter(template_names, template_list, st, threshold,
each template. For example, if a template trace starts 0.1 seconds
before the actual arrival of that phase, then the pick time generated
by match_filter for that phase will be 0.1 seconds early.
-
- .. Note::
- xcorr_func can be used as follows:
-
- .. rubric::xcorr_func argument example
-
- >>> import obspy
- >>> import numpy as np
- >>> from eqcorrscan.core.match_filter.matched_filter import (
- ... match_filter)
- >>> from eqcorrscan.utils.correlate import time_multi_normxcorr
- >>> # define a custom xcorr function
- >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs):
- ... # Just to keep example short call other xcorr function
- ... # in practice you would define your own function here
- ... print('calling custom xcorr function')
- ... return time_multi_normxcorr(templates, stream, pads)
- >>> # generate some toy templates and stream
- >>> random = np.random.RandomState(42)
- >>> template = obspy.read()
- >>> stream = obspy.read()
- >>> for num, tr in enumerate(stream): # iter st and embed templates
- ... data = tr.data
- ... tr.data = random.randn(6000) * 5
- ... tr.data[100: 100 + len(data)] = data
- >>> # call match_filter ane ensure the custom function is used
- >>> detections = match_filter(
- ... template_names=['1'], template_list=[template], st=stream,
- ... threshold=.5, threshold_type='absolute', trig_int=1,
- ... plotvar=False,
- ... xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS
- calling custom xcorr function...
"""
- from eqcorrscan.core.match_filter.detection import Detection
- from eqcorrscan.utils.plotting import _match_filter_plot
+ from eqcorrscan.core.match_filter import Tribe, Template
if "plotvar" in kwargs.keys():
Logger.warning("plotvar is depreciated, use plot instead")
@@ -654,115 +297,50 @@ def match_filter(template_names, template_list, st, threshold,
if isinstance(tr.data, np.ma.core.MaskedArray):
raise MatchFilterError(
'Template contains masked array, split first')
- if spike_test:
- Logger.info("Checking for spikes in data")
- _spike_test(st)
- if cores is not None:
- parallel = True
- else:
- parallel = False
- if peak_cores is None:
- peak_cores = cores
- if copy_data:
- # Copy the stream here because we will muck about with it
- Logger.info("Copying data to keep your input safe")
- stream = st.copy()
- templates = [t.copy() for t in template_list]
- _template_names = template_names.copy() # This can be a shallow copy
- else:
- stream, templates, _template_names = st, template_list, template_names
-
- Logger.info("Reshaping templates")
- stream, templates, _template_names = _prep_data_for_correlation(
- stream=stream, templates=templates, template_names=_template_names)
- if len(templates) == 0:
- raise IndexError("No matching data")
- Logger.info('Starting the correlation run for these data')
- for template in templates:
- Logger.debug(template.__str__())
- Logger.debug(stream.__str__())
- multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency)
- outtic = default_timer()
- [cccsums, no_chans, chans] = multichannel_normxcorr(
- templates=templates, stream=stream, cores=cores, **kwargs)
- if len(cccsums[0]) == 0:
- raise MatchFilterError('Correlation has not run, zero length cccsum')
- outtoc = default_timer()
- Logger.info('Looping over templates and streams took: {0:.4f}s'.format(
- outtoc - outtic))
- Logger.debug(
- 'The shape of the returned cccsums is: {0}'.format(cccsums.shape))
- Logger.debug(
- 'This is from {0} templates correlated with {1} channels of '
- 'data'.format(len(templates), len(stream)))
- detections = []
- if output_cat:
- det_cat = Catalog()
- if str(threshold_type) == str("absolute"):
- thresholds = [threshold for _ in range(len(cccsums))]
- elif str(threshold_type) == str('MAD'):
- thresholds = [threshold * np.median(np.abs(cccsum))
- for cccsum in cccsums]
- else:
- thresholds = [threshold * no_chans[i] for i in range(len(cccsums))]
- if peak_cores is None:
- peak_cores = cores
- outtic = default_timer()
- all_peaks = multi_find_peaks(
- arr=cccsums, thresh=thresholds, parallel=parallel,
- trig_int=int(trig_int * stream[0].stats.sampling_rate),
- full_peaks=full_peaks, cores=peak_cores)
- outtoc = default_timer()
- Logger.info("Finding peaks took {0:.4f}s".format(outtoc - outtic))
- for i, cccsum in enumerate(cccsums):
- if export_cccsums:
- fname = (f"{_template_names[i]}-{stream[0].stats.starttime}-"
- f"{stream[0].stats.endtime}_cccsum.npy")
- np.save(file=fname, arr=cccsum)
- Logger.info(f"Saved correlation statistic to {fname}")
- if np.abs(np.mean(cccsum)) > 0.05:
- Logger.warning('Mean is not zero! Check this!')
- # Set up a trace object for the cccsum as this is easier to plot and
- # maintains timing
- if plot:
- _match_filter_plot(
- stream=stream, cccsum=cccsum, template_names=_template_names,
- rawthresh=thresholds[i], plotdir=plotdir,
- plot_format=plot_format, i=i)
- if all_peaks[i]:
- Logger.debug("Found {0} peaks for template {1}".format(
- len(all_peaks[i]), _template_names[i]))
- for peak in all_peaks[i]:
- detecttime = (
- stream[0].stats.starttime +
- peak[1] / stream[0].stats.sampling_rate)
- detection = Detection(
- template_name=_template_names[i], detect_time=detecttime,
- no_chans=no_chans[i], detect_val=peak[0],
- threshold=thresholds[i], typeofdet='corr', chans=chans[i],
- threshold_type=threshold_type, threshold_input=threshold)
- if output_cat or output_event:
- detection._calculate_event(template_st=templates[i])
- detections.append(detection)
- if output_cat:
- det_cat.append(detection.event)
- else:
- Logger.debug("Found 0 peaks for template {0}".format(
- _template_names[i]))
+
+ # Make a tribe and run tribe.detect
+ tribe = Tribe()
+ # Cope with naming issues
+ name_mapper = {template_name: f"template_{i}"
+ for i, template_name in enumerate(template_names)}
+ for template, template_name in zip(template_list, template_names):
+ tribe += Template(
+ st=template, name=name_mapper[template_name],
+ process_length=(st[0].stats.endtime - st[0].stats.starttime),
+ prepick=0.0, samp_rate=template[0].stats.sampling_rate,
+ )
+
+ # Data must be pre-processed
+ party = tribe.detect(
+ stream=st, threshold=threshold, threshold_type=threshold_type,
+ trig_int=trig_int, plot=plot, plotdir=plotdir, daylong=False,
+ parallel_process=False, xcorr_func=xcorr_func, concurrency=concurrency,
+ cores=cores, ignore_length=True, ignore_bad_data=True, group_size=None,
+ overlap="calculate", full_peaks=full_peaks, save_progress=False,
+ process_cores=None, pre_processed=True, check_processing=False,
+ return_stream=False, plot_format=plot_format,
+ peak_cores=peak_cores, export_cccsums=export_cccsums, **kwargs
+ )
+ detections = [d for f in party for d in f]
+
+ # Remap template names
+ name_mapper = {val: key for key, val in name_mapper.items()}
+ for d in detections:
+ d.template_name = name_mapper[d.template_name]
+
Logger.info("Made {0} detections from {1} templates".format(
- len(detections), len(templates)))
+ len(detections), len(tribe)))
if extract_detections:
- detection_streams = extract_from_stream(stream, detections)
- del stream, templates
+ detection_streams = extract_from_stream(st, detections)
if output_cat and not extract_detections:
- return detections, det_cat
+ return detections, party.get_catalog()
elif not extract_detections:
return detections
elif extract_detections and not output_cat:
return detections, detection_streams
else:
- return detections, det_cat, detection_streams
+ return detections, party.get_catalog(), detection_streams
if __name__ == "__main__":
diff --git a/eqcorrscan/core/match_filter/party.py b/eqcorrscan/core/match_filter/party.py
index 3afb8041b..c4e45407d 100644
--- a/eqcorrscan/core/match_filter/party.py
+++ b/eqcorrscan/core/match_filter/party.py
@@ -15,6 +15,7 @@
import glob
import os
import shutil
+import pickle
import tarfile
import tempfile
import logging
@@ -93,12 +94,13 @@ def __iadd__(self, other):
raise NotImplementedError(
'Ambiguous add, only allowed Party or Family additions.')
for oth_fam in families:
- added = False
- for fam in self.families:
- if fam.template == oth_fam.template:
- fam += oth_fam
- added = True
- if not added:
+ fam = self.select(oth_fam.template.name)
+ # This check is taken care of by Family.__iadd__
+ # assert fam.template == oth_fam.template, (
+ # "Matching template names, but different templates")
+ if fam is not None:
+ fam += oth_fam
+ else:
self.families.append(oth_fam)
return self
@@ -294,6 +296,10 @@ def __len__(self):
length += len(family)
return length
+ @property
+ def _template_dict(self):
+ return {family.template.name: family for family in self}
+
def select(self, template_name):
"""
Select a specific family from the party.
@@ -302,8 +308,7 @@ def select(self, template_name):
:param template_name: Template name of Family to select from a party.
:returns: Family
"""
- return [fam for fam in self.families
- if fam.template.name == template_name][0]
+ return self._template_dict.get(template_name)
def sort(self):
"""
@@ -490,7 +495,7 @@ def rethreshold(self, new_threshold, new_threshold_type='MAD',
def decluster(self, trig_int, timing='detect', metric='avg_cor',
hypocentral_separation=None, min_chans=0,
- absolute_values=False):
+ absolute_values=False, num_threads=None):
"""
De-cluster a Party of detections by enforcing a detection separation.
@@ -527,6 +532,10 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor',
:param absolute_values:
Use the absolute value of the metric to choose the preferred
detection.
+ :type num_threads: int
+ :param num_threads:
+ Number of threads to use for internal c-funcs if available.
+ Only valid if hypocentral_separation used.
.. Warning::
Works in place on object, if you need to keep the original safe
@@ -614,7 +623,8 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor',
peaks_out = decluster_distance_time(
peaks=detect_vals, index=detect_times,
trig_int=trig_int * 10 ** 6, catalog=catalog,
- hypocentral_separation=hypocentral_separation)
+ hypocentral_separation=hypocentral_separation,
+ num_threads=num_threads)
else:
peaks_out = decluster(
peaks=detect_vals, index=detect_times,
@@ -637,6 +647,7 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor',
template=template,
detections=[d for d in declustered_detections
if d.template_name == template_name]))
+ # TODO: this might be better changing the list of families in place.
self.families = new_families
return self
@@ -796,6 +807,12 @@ def read(self, filename=None, read_detection_catalog=True,
filenames = glob.glob(filename)
for _filename in filenames:
Logger.info(f"Reading from {_filename}")
+ # Cope with pickled files
+ if _filename.endswith('.pkl'):
+ with open(_filename, "rb") as _f:
+ chunk_party = pickle.load(_f)
+ self.__iadd__(chunk_party)
+ continue
with tarfile.open(_filename, "r:*") as arc:
temp_dir = tempfile.mkdtemp()
arc.extractall(path=temp_dir, members=_safemembers(arc))
@@ -833,8 +850,8 @@ def read(self, filename=None, read_detection_catalog=True,
return self
def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
- min_cc_from_mean_cc_factor=None,
- horizontal_chans=['E', 'N', '1', '2'], vertical_chans=['Z'],
+ min_cc_from_mean_cc_factor=None, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2'],
cores=1, interpolate=False, plot=False, plotdir=None,
parallel=True, process_cores=None, ignore_length=False,
ignore_bad_data=False, export_cc=False, cc_dir=None,
diff --git a/eqcorrscan/core/match_filter/template.py b/eqcorrscan/core/match_filter/template.py
index 8b792de90..599d27ed3 100644
--- a/eqcorrscan/core/match_filter/template.py
+++ b/eqcorrscan/core/match_filter/template.py
@@ -18,13 +18,15 @@
import shutil
import logging
+from typing import List, Set
+from functools import lru_cache
+
import numpy as np
from obspy import Stream
from obspy.core.event import Comment, Event, CreationInfo
from eqcorrscan.core.match_filter.helpers import _test_event_similarity
-from eqcorrscan.core.match_filter.matched_filter import (
- _group_detect, MatchFilterError)
+from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
from eqcorrscan.core import template_gen
Logger = logging.getLogger(__name__)
@@ -105,6 +107,15 @@ def __init__(self, name=None, st=None, lowcut=None, highcut=None,
author=getpass.getuser())))
self.event = event
+ @property
+ def _processing_parameters(self):
+ """
+ Internal function / attribute to return all processing parameters for
+ quick grouping of templates as tuple.
+ """
+ return (self.lowcut, self.highcut, self.samp_rate, self.filt_order,
+ self.process_length)
+
def __repr__(self):
"""
Print the template.
@@ -155,6 +166,17 @@ def __eq__(self, other, verbose=False, shallow_event_check=False):
>>> template_a == template_b
True
+ This should also cope with nan channels:
+ >>> import numpy as np
+ >>> template_c = template_a.copy()
+ >>> template_c.st[0].data = np.full(
+ ... template_c.st[0].stats.npts, np.nan)
+ >>> template_c == template_a
+ False
+ >>> template_d = template_c.copy()
+ >>> template_d == template_c
+ True
+
This will check all parameters of template including the data in the
stream.
@@ -199,7 +221,8 @@ def __eq__(self, other, verbose=False, shallow_event_check=False):
if self_is_stream and other_is_stream:
for tr, oth_tr in zip(self.st.sort(),
other.st.sort()):
- if not np.array_equal(tr.data, oth_tr.data):
+ if not np.array_equal(
+ tr.data, oth_tr.data, equal_nan=True):
if verbose:
print("Template data are not equal on "
"{0}".format(tr.id))
@@ -284,12 +307,9 @@ def same_processing(self, other):
>>> template_a.same_processing(template_b)
False
"""
- for key in self.__dict__.keys():
- if key in ['name', 'st', 'prepick', 'event', 'template_info']:
- continue
- if not self.__dict__[key] == other.__dict__[key]:
- return False
- return True
+ if self._processing_parameters == other._processing_parameters:
+ return True
+ return False
def write(self, filename, format='tar'):
"""
@@ -510,11 +530,12 @@ def detect(self, stream, threshold, threshold_type, trig_int,
.. Note::
See tutorials for example.
"""
+ from eqcorrscan.core.match_filter.tribe import Tribe
if kwargs.get("plotvar") is not None:
Logger.warning("plotvar is depreciated, use plot instead")
plot = kwargs.get("plotvar")
- party = _group_detect(
- templates=[self], stream=stream.copy(), threshold=threshold,
+ party = Tribe(templates=[self]).detect(
+ stream=stream, threshold=threshold,
threshold_type=threshold_type, trig_int=trig_int, plotdir=plotdir,
plot=plot, pre_processed=pre_processed, daylong=daylong,
parallel_process=parallel_process, xcorr_func=xcorr_func,
@@ -707,6 +728,180 @@ def group_templates(templates):
return template_groups
+def quick_group_templates(templates):
+ """
+ Group templates into sets of similarly processed templates.
+
+ :type templates: List of Tribe of Templates
+ :return: List of Lists of Templates.
+ """
+ # Get the template's processing parameters
+ processing_tuples = [template._processing_parameters
+ for template in templates]
+ # Get list of unique parameter-tuples. Sort it so that the order in which
+ # the groups are processed is consistent across different runs.
+ uniq_processing_parameters = sorted(list(set(processing_tuples)))
+ # sort templates into groups
+ template_groups = []
+ for parameter_combination in uniq_processing_parameters:
+ # find indices of tuples in list with same parameters
+ template_indices_for_group = [
+ j for j, param_tuple in enumerate(processing_tuples)
+ if param_tuple == parameter_combination]
+
+ new_group = list()
+ for template_index in template_indices_for_group:
+ # use indices to sort templates into groups
+ new_group.append(templates[int(template_index)])
+ template_groups.append(new_group)
+ return template_groups
+
+
+def group_templates_by_seedid(
+ templates: List[Template],
+ st_seed_ids: Set[str],
+ group_size: int,
+) -> List[List[Template]]:
+ """
+ Group templates to reduce dissimilar traces
+
+ :param templates:
+ Templates to group together
+ :param st_seed_ids:
+ Seed ids in the stream to be matched with
+ :param group_size:
+ Maximum group size - will not exceed this size
+
+ :return:
+ List of lists of templates grouped.
+ """
+ all_template_sids = {tr.id for template in templates for tr in template.st}
+ if len(all_template_sids.intersection(st_seed_ids)) == 0:
+ Logger.warning(f"No matches between stream ({st_seed_ids} and "
+ f"templates ({all_template_sids}")
+ # Get overlapping seed ids so that we only group based on the
+ # channels we have in the data. Use a tuple so that it hashes
+ template_seed_ids = tuple(
+ (template.name, tuple(
+ {tr.id for tr in template.st}.intersection(st_seed_ids)))
+ for template in templates)
+ # Don't use templates that don't have any overlap with the stream
+ template_seed_ids = tuple(
+ (t_name, t_chans) for t_name, t_chans in template_seed_ids
+ if len(t_chans))
+ Logger.info(f"Dropping {len(templates) - len(template_seed_ids)} "
+ f"templates due to no matched channels")
+ # We will need this dictionary at the end for getting the templates by id
+ template_dict = {t.name: t for t in templates}
+ # group_size can be None, in which case we don't actually need to group
+ if (group_size is not None) and (group_size < len(template_seed_ids)):
+ # Pass off to cached function
+ out_groups = _group_seed_ids(
+ template_seed_ids=template_seed_ids, group_size=group_size)
+
+ # Convert from groups of template names to groups of templates
+ out_groups = [[template_dict[t] for t in out_group]
+ for out_group in out_groups]
+ else:
+ Logger.info(f"Group size ({group_size}) larger than n templates"
+ f" ({len(template_seed_ids)}), no grouping performed")
+ out_groups = [[template_dict[t[0]] for t in template_seed_ids]]
+ assert sum(len(grp) for grp in out_groups) == len(template_seed_ids), (
+ "Something went wrong internally with grouping - we don't have the "
+ "right number of templates. Please report this bug")
+ return out_groups
+
+
+@lru_cache(maxsize=2)
+def _group_seed_ids(template_seed_ids, group_size):
+ """ Cachable version of internals to avoid re-computing for every day. """
+ # Convert hashable tuple to dict for ease
+ template_seed_ids = {tup[0]: set(tup[1]) for tup in template_seed_ids}
+ # Get initial groups with matched traces - sort by length
+ sorted_templates = sorted(
+ template_seed_ids, key=lambda key: len(template_seed_ids[key]),
+ reverse=True)
+ # Group into matching sets of seed-ids - ideally all groups would have the
+ # same seed ids and no nan-traces
+ groups, group, group_sids, group_sid = (
+ [], [sorted_templates[0]], [], template_seed_ids[sorted_templates[0]])
+
+ for i in range(1, len(sorted_templates)):
+ # Check that we don't exceed the maximum group size first
+ if len(group) >= group_size or (
+ template_seed_ids[sorted_templates[i]] != group_sid):
+ groups.append(group)
+ group = [sorted_templates[i]]
+ group_sids.append(group_sid)
+ group_sid = template_seed_ids[sorted_templates[i]]
+ else:
+ group.append(sorted_templates[i])
+ # Get the final group
+ groups.append(group)
+ group_sids.append(group_sid)
+ Logger.info(f"{len(groups)} initial template groups")
+
+ # Check if all the groups are full
+ groups_full = sum([len(grp) == group_size for grp in groups])
+ if groups_full >= (len(template_seed_ids) // group_size) - 1:
+ return groups
+
+ # Smush together groups until group-size condition is met.
+ # Make list of group ids - compare ungrouped sids to intersection of sids
+ # in group - add most similar, then repeat. Start with fewest sids.
+
+ n_original_groups = len(groups)
+ grouped = np.zeros(n_original_groups, dtype=bool)
+ # Use -1 as no group given
+ group_ids = np.ones(n_original_groups, dtype=int) * -1
+
+ # Sort groups by length of seed-ids
+ order = np.argsort([len(sid) for sid in group_sids])
+ groups = [groups[i] for i in order]
+ group_sids = [group_sids[i] for i in order]
+
+ # Assign shortest group to the zeroth group
+ grouped[0], group_id = 1, 0
+ group_ids[0] = group_id
+ group_sid = group_sids[0]
+ group_len = len(groups[0])
+ # Loop until all groups have been assigned a final group
+ Logger.info("Running similarity grouping")
+ while grouped.sum() < n_original_groups:
+ # Work out difference between groups
+ diffs = [(i, len(group_sid.symmetric_difference(other_sid)))
+ for i, other_sid in enumerate(group_sids) if not grouped[i]]
+ diffs.sort(key=lambda tup: tup[1])
+ closest_group_id = diffs[0][0]
+
+ if group_len + len(groups[closest_group_id]) > group_size:
+ # Max size reached, make new group
+ group_id += 1
+ # Take the next shortest ungrouped group
+ i = 0
+ while grouped[i]:
+ i += 1
+ group_sid = group_sids[i]
+ grouped[i] = 1
+ group_ids[i] = group_id
+ group_len = len(groups[i])
+ else:
+ # Add in closest
+ grouped[closest_group_id] = 1
+ group_ids[closest_group_id] = group_id
+ # Update the group seed ids to include the new ones
+ group_sid = group_sid.union(group_sids[closest_group_id])
+ group_len += len(groups[closest_group_id])
+ Logger.info("Completed grouping")
+
+ out_groups = []
+ for group_id in set(group_ids):
+ out_group = [t for i in range(n_original_groups)
+ if group_ids[i] == group_id for t in groups[i]]
+ out_groups.append(out_group)
+ return out_groups
+
+
if __name__ == "__main__":
import doctest
diff --git a/eqcorrscan/core/match_filter/tribe.py b/eqcorrscan/core/match_filter/tribe.py
index f03bf77a4..e415d1897 100644
--- a/eqcorrscan/core/match_filter/tribe.py
+++ b/eqcorrscan/core/match_filter/tribe.py
@@ -15,23 +15,33 @@
import getpass
import glob
import os
+import pickle
import shutil
import tarfile
import tempfile
+import traceback
+import uuid
import logging
-import numpy as np
+from multiprocessing import Process, Queue, cpu_count
+from queue import Empty
+
from obspy import Catalog, Stream, read, read_events
from obspy.core.event import Comment, CreationInfo
-from eqcorrscan.core.match_filter.template import Template, group_templates
+from eqcorrscan.core import template_gen
+from eqcorrscan.core.match_filter.template import (
+ Template, quick_group_templates)
from eqcorrscan.core.match_filter.party import Party
+from eqcorrscan.core.match_filter.family import Family
from eqcorrscan.core.match_filter.helpers import (
- _safemembers, _par_read, get_waveform_client)
-from eqcorrscan.core.match_filter.matched_filter import (
- _group_detect, MatchFilterError)
-from eqcorrscan.core import template_gen
-from eqcorrscan.utils.pre_processing import _check_daylong
+ _safemembers, _par_read, get_waveform_client,
+ _remove_duplicates, _moveout)
+from eqcorrscan.core.match_filter.helpers.tribe import _wildcard_fill
+
+from eqcorrscan.utils.pre_processing import (
+ _quick_copy_stream, _prep_data_for_correlation)
+
Logger = logging.getLogger(__name__)
@@ -43,6 +53,7 @@ class Tribe(object):
:type templates: List of Template
:param templates: The templates within the Tribe.
"""
+ _timeout = 1
def __init__(self, templates=None):
self.templates = []
@@ -50,6 +61,11 @@ def __init__(self, templates=None):
templates = [templates]
if templates:
self.templates.extend(templates)
+ # Managers for Processes and Queues to be killed on errors
+ self._processes = dict()
+ self._queues = dict()
+ # Assign unique ids
+ self.__unique_ids()
def __repr__(self):
"""
@@ -93,12 +109,13 @@ def __iadd__(self, other):
>>> print(tribe)
Tribe of 3 templates
"""
+ assert isinstance(other, (Tribe, Template)), \
+ "Must be either Template or Tribe"
+ self.__unique_ids(other)
if isinstance(other, Tribe):
self.templates += other.templates
elif isinstance(other, Template):
self.templates.append(other)
- else:
- raise TypeError('Must be either Template or Tribe')
return self
def __eq__(self, other):
@@ -169,6 +186,8 @@ def __getitem__(self, index):
process length: None s
>>> tribe[0:2]
Tribe of 2 templates
+ >>> tribe["d"]
+ []
"""
if isinstance(index, slice):
return self.__class__(templates=self.templates.__getitem__(index))
@@ -183,6 +202,32 @@ def __getitem__(self, index):
Logger.warning('Template: %s not in tribe' % index)
return []
+ def __unique_ids(self, other=None):
+ """ Check that template names are unique. """
+ template_names = [t.name for t in self.templates]
+ if other:
+ assert isinstance(other, (Template, Tribe)), (
+ "Can only test against tribes or templates")
+ if isinstance(other, Template):
+ template_names.append(other.name)
+ else:
+ template_names.extend([t.name for t in other])
+
+ unique_names = set(template_names)
+ if len(unique_names) < len(template_names):
+ non_unique_names = [name for name in unique_names
+ if template_names.count(name) > 1]
+ raise NotImplementedError(
+ "Multiple templates found with the same name. Template names "
+ "must be unique. Non-unique templates: "
+ f"{', '.join(non_unique_names)}")
+ return
+
+ @property
+ def _stream_dir(self):
+ """ Location for temporary streams """
+ return f".streams_{os.getpid()}"
+
def sort(self):
"""
Sort the tribe, sorts by template name.
@@ -256,7 +301,9 @@ def copy(self):
>>> tribe_a == tribe_b
True
"""
- return copy.deepcopy(self)
+ # We can't copy processes, so we need to just copy the templates
+ other = Tribe(copy.deepcopy(self.templates))
+ return other
def write(self, filename, compress=True, catalog_format="QUAKEML"):
"""
@@ -281,6 +328,11 @@ def write(self, filename, compress=True, catalog_format="QUAKEML"):
>>> tribe = Tribe(templates=[Template(name='c', st=read())])
>>> tribe.write('test_tribe')
Tribe of 1 templates
+ >>> tribe.write(
+ ... "this_wont_work.bob",
+ ... catalog_format="BOB") # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ TypeError: BOB is not supported
"""
from eqcorrscan.core.match_filter import CAT_EXT_MAP
@@ -295,8 +347,9 @@ def write(self, filename, compress=True, catalog_format="QUAKEML"):
if t.event is not None:
# Check that the name in the comment matches the template name
for comment in t.event.comments:
- if comment.text and comment.text.startswith(
- "eqcorrscan_template_"):
+ if not comment.text:
+ comment.text = "eqcorrscan_template_{0}".format(t.name)
+ elif comment.text.startswith("eqcorrscan_template_"):
comment.text = "eqcorrscan_template_{0}".format(t.name)
tribe_cat.append(t.event)
if len(tribe_cat) > 0:
@@ -333,6 +386,29 @@ def _par_write(self, dirname):
parfile.write('\n')
return self
+ def _temporary_template_db(self, template_dir: str = None) -> dict:
+ """
+ Write a temporary template database of pickled templates to disk.
+
+ :param template_dir:
+ Directory to write to - if None will make a temporary directory.
+ """
+ # We use template names for filenames - check that these are unique
+ self.__unique_ids()
+ # Make sure that the template directory exists, or make a tempdir
+ if template_dir:
+ if not os.path.isdir(template_dir):
+ os.makedirs(template_dir)
+ else:
+ template_dir = tempfile.mkdtemp()
+ template_files = dict()
+ for template in self.templates:
+ t_file = os.path.join(template_dir, f"{template.name}.pkl")
+ with open(t_file, "wb") as f:
+ pickle.dump(template, f)
+ template_files.update({template.name: t_file})
+ return template_files
+
def read(self, filename):
"""
Read a tribe of templates from a tar formatted file.
@@ -348,13 +424,26 @@ def read(self, filename):
>>> tribe_back = Tribe().read('test_tribe.tgz')
>>> tribe_back == tribe
True
+ >>> # This can also read pickled templates
+ >>> import pickle
+ >>> with open("test_tribe.pkl", "wb") as f:
+ ... pickle.dump(tribe, f)
+ >>> tribe_back = Tribe().read("test_tribe.pkl")
+ >>> tribe_back == tribe
+ True
"""
+ if filename.endswith(".pkl"):
+ with open(filename, "rb") as f:
+ self.__iadd__(pickle.load(f))
+ return self
with tarfile.open(filename, "r:*") as arc:
temp_dir = tempfile.mkdtemp()
arc.extractall(path=temp_dir, members=_safemembers(arc))
tribe_dir = glob.glob(temp_dir + os.sep + '*')[0]
self._read_from_folder(dirname=tribe_dir)
shutil.rmtree(temp_dir)
+ # Assign unique ids
+ self.__unique_ids()
return self
def _read_from_folder(self, dirname):
@@ -384,14 +473,157 @@ def _read_from_folder(self, dirname):
if t.split(os.sep)[-1] == template.name + '.ms']
if len(t_file) == 0:
Logger.error('No waveform for template: ' + template.name)
- templates.remove(template)
continue
elif len(t_file) > 1:
Logger.warning('Multiple waveforms found, using: ' + t_file[0])
template.st = read(t_file[0])
+ # Remove templates that do not have streams
+ templates = [t for t in templates if t.st is not None]
self.templates.extend(templates)
return
+ def construct(self, method, lowcut, highcut, samp_rate, filt_order,
+ length, prepick, swin="all", process_len=86400,
+ all_horiz=False, delayed=True, plot=False, plotdir=None,
+ min_snr=None, parallel=False, num_cores=False,
+ skip_short_chans=False, save_progress=False, **kwargs):
+ """
+ Generate a Tribe of Templates.
+
+ :type method: str
+ :param method:
+ Method of Tribe generation. Possible options are: `from_client`,
+ `from_meta_file`. See below on the additional required arguments
+ for each method.
+ :type lowcut: float
+ :param lowcut:
+ Low cut (Hz), if set to None will not apply a lowcut
+ :type highcut: float
+ :param highcut:
+ High cut (Hz), if set to None will not apply a highcut.
+ :type samp_rate: float
+ :param samp_rate:
+ New sampling rate in Hz.
+ :type filt_order: int
+ :param filt_order:
+ Filter level (number of corners).
+ :type length: float
+ :param length: Length of template waveform in seconds.
+ :type prepick: float
+ :param prepick: Pre-pick time in seconds
+ :type swin: str
+ :param swin:
+ P, S, P_all, S_all or all, defaults to all: see note in
+ :func:`eqcorrscan.core.template_gen.template_gen`
+ :type process_len: int
+ :param process_len: Length of data in seconds to download and process.
+ :type all_horiz: bool
+ :param all_horiz:
+ To use both horizontal channels even if there is only a pick on
+ one of them. Defaults to False.
+ :type delayed: bool
+ :param delayed: If True, each channel will begin relative to it's own
+ pick-time, if set to False, each channel will begin at the same
+ time.
+ :type plot: bool
+ :param plot: Plot templates or not.
+ :type plotdir: str
+ :param plotdir:
+ The path to save plots to. If `plotdir=None` (default) then the
+ figure will be shown on screen.
+ :type min_snr: float
+ :param min_snr:
+ Minimum signal-to-noise ratio for a channel to be included in the
+ template, where signal-to-noise ratio is calculated as the ratio
+ of the maximum amplitude in the template window to the rms
+ amplitude in the whole window given.
+ :type parallel: bool
+ :param parallel: Whether to process data in parallel or not.
+ :type num_cores: int
+ :param num_cores:
+ Number of cores to try and use, if False and parallel=True,
+ will use either all your cores, or as many traces as in the data
+ (whichever is smaller).
+ :type save_progress: bool
+ :param save_progress:
+ Whether to save the resulting template set at every data step or
+ not. Useful for long-running processes.
+ :type skip_short_chans: bool
+ :param skip_short_chans:
+ Whether to ignore channels that have insufficient length data or
+ not. Useful when the quality of data is not known, e.g. when
+ downloading old, possibly triggered data from a datacentre
+
+ .. note::
+ *Method specific arguments:*
+
+ - `from_client` requires:
+ :param str client_id:
+ string passable by obspy to generate Client, or any object
+ with a `get_waveforms` method, including a Client instance.
+ :param `obspy.core.event.Catalog` catalog:
+ Catalog of events to generate template for
+ :param float data_pad: Pad length for data-downloads in seconds
+ - `from_meta_file` requires:
+ :param str meta_file:
+ Path to obspy-readable event file, or an obspy Catalog
+ :param `obspy.core.stream.Stream` st:
+ Stream containing waveform data for template. Note that
+ this should be the same length of stream as you will use
+ for the continuous detection, e.g. if you detect in
+ day-long files, give this a day-long file!
+ :param bool process:
+ Whether to process the data or not, defaults to True.
+
+ .. Note::
+ Method: `from_sac` is not supported by Tribe.construct and must
+ use Template.construct.
+
+ .. Note:: Templates will be named according to their start-time.
+ """
+ templates, catalog, process_lengths = template_gen.template_gen(
+ method=method, lowcut=lowcut, highcut=highcut, length=length,
+ filt_order=filt_order, samp_rate=samp_rate, prepick=prepick,
+ return_event=True, save_progress=save_progress, swin=swin,
+ process_len=process_len, all_horiz=all_horiz, plotdir=plotdir,
+ delayed=delayed, plot=plot, min_snr=min_snr, parallel=parallel,
+ num_cores=num_cores, skip_short_chans=skip_short_chans,
+ **kwargs)
+ for template, event, process_len in zip(templates, catalog,
+ process_lengths):
+ t = Template()
+ if len(template) == 0:
+ Logger.error('Empty Template')
+ continue
+ t.st = template
+ t.name = template.sort(['starttime'])[0]. \
+ stats.starttime.strftime('%Y_%m_%dt%H_%M_%S')
+ t.lowcut = lowcut
+ t.highcut = highcut
+ t.filt_order = filt_order
+ t.samp_rate = samp_rate
+ t.process_length = process_len
+ t.prepick = prepick
+ event.comments.append(Comment(
+ text="eqcorrscan_template_" + t.name,
+ creation_info=CreationInfo(agency='eqcorrscan',
+ author=getpass.getuser())))
+ t.event = event
+ self.templates.append(t)
+ return self
+
+ def _template_channel_ids(self, wildcards: bool = False):
+ template_channel_ids = set()
+ for template in self.templates:
+ for tr in template.st:
+ # Cope with missing info and convert to wildcards
+ net, sta, loc, chan = tr.id.split('.')
+ if wildcards:
+ net, sta, loc, chan = _wildcard_fill(
+ net, sta, loc, chan)
+ template_channel_ids.add((net, sta, loc, chan))
+ return template_channel_ids
+
def cluster(self, method, **kwargs):
"""
Cluster the tribe.
@@ -404,10 +636,6 @@ def cluster(self, method, **kwargs):
Method of stacking, see :mod:`eqcorrscan.utils.clustering`
:return: List of tribes.
-
- .. rubric:: Example
-
-
"""
from eqcorrscan.utils import clustering
tribes = []
@@ -426,14 +654,18 @@ def cluster(self, method, **kwargs):
def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
plotdir=None, daylong=False, parallel_process=True,
xcorr_func=None, concurrency=None, cores=None,
- ignore_length=False, ignore_bad_data=False, group_size=None,
- overlap="calculate", full_peaks=False, save_progress=False,
- process_cores=None, pre_processed=False, **kwargs):
+ concurrent_processing=False, ignore_length=False,
+ ignore_bad_data=False, group_size=None, overlap="calculate",
+ full_peaks=False, save_progress=False, process_cores=None,
+ pre_processed=False, check_processing=True,
+ **kwargs):
"""
Detect using a Tribe of templates within a continuous stream.
- :type stream: `obspy.core.stream.Stream`
- :param stream: Continuous data to detect within using the Template.
+ :type stream: `Queue` or `obspy.core.stream.Stream`
+ :param stream:
+ Queue of streams of continuous data to detect within using the
+ Templates, or just the continuous data itself.
:type threshold: float
:param threshold:
Threshold level, if using `threshold_type='MAD'` then this will be
@@ -472,7 +704,12 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
'multithread', 'multiprocess', 'concurrent'. For more details see
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`
:type cores: int
- :param cores: Number of workers for procesisng and detection.
+ :param cores: Number of workers for processing and detection.
+ :type concurrent_processing: bool
+ :param concurrent_processing:
+ Whether to process steps in detection workflow concurrently or not.
+ See https://github.com/eqcorrscan/EQcorrscan/pull/544 for
+ benchmarking.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
@@ -496,7 +733,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
overlap = "calculate" will work out the appropriate overlap based
on the maximum lags within templates.
:type full_peaks: bool
- :param full_peaks: See `eqcorrscan.utils.findpeak.find_peaks2_short`
+ :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short`
:type save_progress: bool
:param save_progress:
Whether to save the resulting party at every data step or not.
@@ -509,6 +746,9 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
:param pre_processed:
Whether the stream has been pre-processed or not to match the
templates.
+ :type check_processing: bool
+ :param check_processing:
+ Whether to check that all templates were processed the same.
:return:
:class:`eqcorrscan.core.match_filter.Party` of Families of
@@ -583,45 +823,414 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
where :math:`template` is a single template from the input and the
length is the number of channels within this template.
"""
- party = Party()
- template_groups = group_templates(self.templates)
- if len(template_groups) > 1 and pre_processed:
+ # Check that template names are unique
+ self.__unique_ids()
+ # We should not need to copy the stream, it is copied in chunks by
+ # _group_process
+
+ # Argument handling
+ if overlap is None:
+ overlap = 0.0
+ elif not isinstance(overlap, float) and str(overlap) == "calculate":
+ overlap = max(
+ _moveout(template.st) for template in self.templates)
+ elif not isinstance(overlap, float):
raise NotImplementedError(
- "Inconsistent template processing and pre-processed data - "
- "something is wrong!")
- # now we can compute the detections for each group
- for group in template_groups:
- group_party = _group_detect(
- templates=group, stream=stream.copy(), threshold=threshold,
- threshold_type=threshold_type, trig_int=trig_int,
- plot=plot, group_size=group_size, pre_processed=pre_processed,
- daylong=daylong, parallel_process=parallel_process,
- xcorr_func=xcorr_func, concurrency=concurrency, cores=cores,
- ignore_length=ignore_length, overlap=overlap, plotdir=plotdir,
- full_peaks=full_peaks, process_cores=process_cores,
- ignore_bad_data=ignore_bad_data, arg_check=False, **kwargs)
- party += group_party
- if save_progress:
- party.write("eqcorrscan_temporary_party")
+ "%s is not a recognised overlap type" % str(overlap))
+ assert overlap < self.templates[0].process_length, (
+ f"Overlap {overlap} must be less than process length "
+ f"{self.templates[0].process_length}")
+
+ # Copy because we need to muck around with them.
+ inner_kwargs = copy.copy(kwargs)
+
+ plot_format = inner_kwargs.pop("plot_format", "png")
+ export_cccsums = inner_kwargs.pop('export_cccsums', False)
+ peak_cores = inner_kwargs.pop('peak_cores',
+ process_cores) or cpu_count()
+ groups = inner_kwargs.pop("groups", None)
+
+ if peak_cores == 1:
+ parallel = False
+ else:
+ parallel = True
+
+ if check_processing:
+ assert len(quick_group_templates(self.templates)) == 1, (
+ "Inconsistent template processing parameters found, this is no"
+ " longer supported. \nSplit your tribe using "
+ "eqcorrscan.core.match_filter.template.quick_group_templates "
+ "and re-run for each grouped tribe")
+ sampling_rate = self.templates[0].samp_rate
+ # Used for sanity checking seed id overlap
+ template_ids = set(
+ tr.id for template in self.templates for tr in template.st)
+
+ args = (stream, template_ids, pre_processed, parallel_process,
+ process_cores, daylong, ignore_length, overlap,
+ ignore_bad_data, group_size, groups, sampling_rate, threshold,
+ threshold_type, save_progress, xcorr_func, concurrency, cores,
+ export_cccsums, parallel, peak_cores, trig_int, full_peaks,
+ plot, plotdir, plot_format,)
+
+ if concurrent_processing:
+ party = self._detect_concurrent(*args, **inner_kwargs)
+ else:
+ party = self._detect_serial(*args, **inner_kwargs)
+
+ Logger.info("Ensuring all templates are in party")
+ additional_families = []
+ for template in self.templates:
+ if template.name in party._template_dict.keys():
+ continue
+ additional_families.append(
+ Family(template=template, detections=[]))
+ party.families.extend(additional_families)
+
+ # Post-process
if len(party) > 0:
- for family in party:
- if family is not None:
- family.detections = family._uniq().detections
+ Logger.info("Removing duplicates")
+ party = _remove_duplicates(party)
+ return party
+
+ def _detect_serial(
+ self, stream, template_ids, pre_processed, parallel_process,
+ process_cores, daylong, ignore_length, overlap, ignore_bad_data,
+ group_size, groups, sampling_rate, threshold, threshold_type,
+ save_progress, xcorr_func, concurrency, cores, export_cccsums,
+ parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format,
+ **kwargs
+ ):
+ """ Internal serial detect workflow. """
+ from eqcorrscan.core.match_filter.helpers.tribe import (
+ _pre_process, _group, _corr_and_peaks, _detect, _make_party)
+ party = Party()
+
+ assert isinstance(stream, Stream), (
+ f"Serial detection requires stream to be a stream, not"
+ f" a {type(stream)}")
+
+ # We need to copy data here to keep the users input safe.
+ st_chunks = _pre_process(
+ st=stream.copy(), template_ids=template_ids,
+ pre_processed=pre_processed,
+ filt_order=self.templates[0].filt_order,
+ highcut=self.templates[0].highcut,
+ lowcut=self.templates[0].lowcut,
+ samp_rate=self.templates[0].samp_rate,
+ process_length=self.templates[0].process_length,
+ parallel=parallel_process, cores=process_cores, daylong=daylong,
+ ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
+ overlap=overlap, **kwargs)
+
+ chunk_files = []
+ for st_chunk in st_chunks:
+ starttime = st_chunk[0].stats.starttime
+ delta = st_chunk[0].stats.delta
+ template_groups = _group(
+ sids={tr.id for tr in st_chunk},
+ templates=self.templates, group_size=group_size, groups=groups)
+ for i, template_group in enumerate(template_groups):
+ templates = [_quick_copy_stream(t.st) for t in template_group]
+ template_names = [t.name for t in template_group]
+ Logger.info(
+ f"Prepping {len(templates)} templates for correlation")
+
+ # We need to copy the stream here.
+ _st, templates, template_names = _prep_data_for_correlation(
+ stream=_quick_copy_stream(st_chunk), templates=templates,
+ template_names=template_names)
+
+ all_peaks, thresholds, no_chans, chans = _corr_and_peaks(
+ templates=templates, template_names=template_names,
+ stream=_st, xcorr_func=xcorr_func, concurrency=concurrency,
+ cores=cores, i=i, export_cccsums=export_cccsums,
+ parallel=parallel, peak_cores=peak_cores,
+ threshold=threshold, threshold_type=threshold_type,
+ trig_int=trig_int, sampling_rate=sampling_rate,
+ full_peaks=full_peaks, plot=plot, plotdir=plotdir,
+ plot_format=plot_format, prepped=False, **kwargs)
+
+ detections = _detect(
+ template_names=template_names,
+ all_peaks=all_peaks, starttime=starttime,
+ delta=delta, no_chans=no_chans,
+ chans=chans, thresholds=thresholds)
+
+ chunk_file = _make_party(
+ detections=detections, threshold=threshold,
+ threshold_type=threshold_type,
+ templates=self.templates, chunk_start=starttime,
+ chunk_id=i, save_progress=save_progress)
+ chunk_files.append(chunk_file)
+ # Rebuild
+ for _chunk_file in chunk_files:
+ Logger.info(f"Adding party from {_chunk_file} to party")
+ with open(_chunk_file, "rb") as _f:
+ party += pickle.load(_f)
+ if not save_progress:
+ try:
+ os.remove(_chunk_file)
+ except FileNotFoundError:
+ pass
+ Logger.info(f"Added party from {_chunk_file}, party now "
+ f"contains {len(party)} detections")
+
+ if os.path.isdir(self._stream_dir):
+ shutil.rmtree(self._stream_dir)
+ return party
+
+ def _detect_concurrent(
+ self, stream, template_ids, pre_processed, parallel_process,
+ process_cores, daylong, ignore_length, overlap, ignore_bad_data,
+ group_size, groups, sampling_rate, threshold, threshold_type,
+ save_progress, xcorr_func, concurrency, cores, export_cccsums,
+ parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format,
+ **kwargs
+ ):
+ """ Internal concurrent detect workflow. """
+ from eqcorrscan.core.match_filter.helpers.processes import (
+ _pre_processor, _prepper, _make_detections, _check_for_poison,
+ _get_and_check, Poison)
+ from eqcorrscan.core.match_filter.helpers.tribe import _corr_and_peaks
+
+ if isinstance(stream, Stream):
+ Logger.info("Copying stream to keep your original data safe")
+ st_queue = Queue(maxsize=2)
+ st_queue.put(stream.copy())
+ # Close off queue
+ st_queue.put(None)
+ stream = st_queue
+ else:
+ # Note that if a queue has been passed we do not try to keep
+ # data safe
+ Logger.warning("Streams in queue will be edited in-place, you "
+ "should not re-use them")
+
+ # To reduce load copying templates between processes we dump them to
+ # disk and pass the dictionary of files
+ template_dir = f".template_db_{uuid.uuid4()}"
+ template_db = self._temporary_template_db(template_dir)
+
+ # Set up processes and queues
+ poison_queue = kwargs.get('poison_queue', Queue())
+
+ if not pre_processed:
+ processed_stream_queue = Queue(maxsize=1)
+ else:
+ processed_stream_queue = stream
+
+ # Prepped queue contains templates and stream (and extras)
+ prepped_queue = Queue(maxsize=1)
+ # Output queues
+ peaks_queue = Queue()
+ party_file_queue = Queue()
+
+ # Set up processes
+ if not pre_processed:
+ pre_processor_process = Process(
+ target=_pre_processor,
+ kwargs=dict(
+ input_stream_queue=stream,
+ temp_stream_dir=self._stream_dir,
+ template_ids=template_ids,
+ pre_processed=pre_processed,
+ filt_order=self.templates[0].filt_order,
+ highcut=self.templates[0].highcut,
+ lowcut=self.templates[0].lowcut,
+ samp_rate=self.templates[0].samp_rate,
+ process_length=self.templates[0].process_length,
+ parallel=parallel_process,
+ cores=process_cores,
+ daylong=daylong,
+ ignore_length=ignore_length,
+ overlap=overlap,
+ ignore_bad_data=ignore_bad_data,
+ output_filename_queue=processed_stream_queue,
+ poison_queue=poison_queue,
+ ),
+ name="ProcessProcess"
+ )
+
+ prepper_process = Process(
+ target=_prepper,
+ kwargs=dict(
+ input_stream_filename_queue=processed_stream_queue,
+ group_size=group_size,
+ groups=groups,
+ templates=template_db,
+ output_queue=prepped_queue,
+ poison_queue=poison_queue,
+ xcorr_func=xcorr_func,
+ ),
+ name="PrepProcess"
+ )
+ detector_process = Process(
+ target=_make_detections,
+ kwargs=dict(
+ input_queue=peaks_queue,
+ delta=1 / sampling_rate,
+ templates=template_db,
+ threshold=threshold,
+ threshold_type=threshold_type,
+ save_progress=save_progress,
+ output_queue=party_file_queue,
+ poison_queue=poison_queue,
+ ),
+ name="DetectProcess"
+ )
+
+ # Cope with old tribes
+ if not hasattr(self, '_processes'):
+ self._processes = dict()
+ if not hasattr(self, '_queues'):
+ self._queues = dict()
+
+ # Put these processes into the namespace
+ self._processes.update({
+ "prepper": prepper_process,
+ "detector": detector_process,
+ })
+ self._queues.update({
+ "poison": poison_queue,
+ "stream": stream,
+ "prepped": prepped_queue,
+ "peaks": peaks_queue,
+ "party_file": party_file_queue,
+ })
+
+ if not pre_processed:
+ Logger.info("Starting preprocessor")
+ self._queues.update({"processed_stream": processed_stream_queue})
+ self._processes.update({"pre-processor": pre_processor_process})
+ pre_processor_process.start()
+
+ # Start your engines!
+ prepper_process.start()
+ detector_process.start()
+
+ # Loop over input streams and template groups
+ while True:
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ Logger.info("Killed in main loop")
+ break
+ try:
+ to_corr = _get_and_check(prepped_queue, poison_queue)
+ if to_corr is None:
+ Logger.info("Ran out of streams, exiting correlation")
+ break
+ elif isinstance(to_corr, Poison):
+ Logger.info("Killed in main loop")
+ break
+ starttime, i, stream, template_names, templates, \
+ *extras = to_corr
+ inner_kwargs = copy.copy(kwargs) # We will mangle them
+ # Correlation specific handling to reduce single-threaded time
+ if xcorr_func == "fmf":
+ weights, pads, chans, no_chans = extras
+ inner_kwargs.update({
+ 'weights': weights, 'pads': pads, "no_chans": no_chans,
+ "chans": chans, "prepped": True})
+ elif xcorr_func in (None, 'fftw'):
+ pads, seed_ids = extras
+ inner_kwargs.update({
+ "pads": pads, "seed_ids": seed_ids, "prepped": True})
+
+ Logger.info(f"Got stream of {len(stream)} channels")
+ Logger.info(f"Starting correlation from {starttime}")
+
+ all_peaks, thresholds, no_chans, chans = _corr_and_peaks(
+ templates=templates, template_names=template_names,
+ stream=stream, xcorr_func=xcorr_func,
+ concurrency=concurrency,
+ cores=cores, i=i, export_cccsums=export_cccsums,
+ parallel=parallel, peak_cores=peak_cores,
+ threshold=threshold, threshold_type=threshold_type,
+ trig_int=trig_int, sampling_rate=sampling_rate,
+ full_peaks=full_peaks, plot=plot, plotdir=plotdir,
+ plot_format=plot_format, **inner_kwargs
+ )
+ peaks_queue.put(
+ (starttime, all_peaks, thresholds, no_chans, chans,
+ template_names))
+ except Exception as e:
+ Logger.error(
+ f"Caught exception in correlator:\n {e}")
+ traceback.print_tb(e.__traceback__)
+ poison_queue.put(e)
+ break # We need to break in Main
+ i += 1
+ Logger.debug("Putting None into peaks queue.")
+ peaks_queue.put(None)
+
+ # Get the party back
+ Logger.info("Collecting party")
+ party = Party()
+ while True:
+ killed = _check_for_poison(poison_queue)
+ if killed:
+ Logger.error("Killed")
+ break
+ pf = _get_and_check(party_file_queue, poison_queue)
+ if pf is None:
+ # Fin - Queue has been finished with.
+ break
+ if isinstance(pf, Poison):
+ Logger.error("Killed while checking for party")
+ killed = True
+ break
+ with open(pf, "rb") as f:
+ party += pickle.load(f)
+ if not save_progress:
+ try:
+ os.remove(pf)
+ except FileNotFoundError:
+ pass
+
+ # Check for exceptions
+ if killed:
+ internal_error = poison_queue.get()
+ if isinstance(internal_error, Poison):
+ internal_error = internal_error.value
+ Logger.error(f"Raising error {internal_error} in main process")
+ # Now we can raise the error
+ if internal_error:
+ # Clean the template db
+ if os.path.isdir(template_dir):
+ shutil.rmtree(template_dir)
+ if os.path.isdir(self._stream_dir):
+ shutil.rmtree(self._stream_dir)
+ self._on_error(internal_error)
+
+ # Shut down the processes and close the queues
+ shutdown = kwargs.get("shutdown", True)
+ # Allow client_detect to take control
+ if shutdown:
+ Logger.info("Shutting down")
+ self._close_queues()
+ self._close_processes()
+ if os.path.isdir(template_dir):
+ shutil.rmtree(template_dir)
+ if os.path.isdir(self._stream_dir):
+ shutil.rmtree(self._stream_dir)
return party
def client_detect(self, client, starttime, endtime, threshold,
threshold_type, trig_int, plot=False, plotdir=None,
min_gap=None, daylong=False, parallel_process=True,
xcorr_func=None, concurrency=None, cores=None,
- ignore_length=False, ignore_bad_data=False,
- group_size=None, return_stream=False, full_peaks=False,
+ concurrent_processing=False, ignore_length=False,
+ ignore_bad_data=False, group_size=None,
+ return_stream=False, full_peaks=False,
save_progress=False, process_cores=None, retries=3,
- **kwargs):
+ check_processing=True, **kwargs):
"""
Detect using a Tribe of templates within a continuous stream.
:type client: `obspy.clients.*.Client`
- :param client: Any obspy client with a dataselect service.
+ :param client:
+ Any obspy client (or client-like object) with a dataselect service.
:type starttime: :class:`obspy.core.UTCDateTime`
:param starttime: Start-time for detections.
:type endtime: :class:`obspy.core.UTCDateTime`
@@ -669,6 +1278,11 @@ def client_detect(self, client, starttime, endtime, threshold,
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`
:type cores: int
:param cores: Number of workers for processing and detection.
+ :type concurrent_processing: bool
+ :param concurrent_processing:
+ Whether to process steps in detection workflow concurrently or not.
+ See https://github.com/eqcorrscan/EQcorrscan/pull/544 for
+ benchmarking.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
@@ -756,7 +1370,9 @@ def client_detect(self, client, starttime, endtime, threshold,
where :math:`template` is a single template from the input and the
length is the number of channels within this template.
"""
- from obspy.clients.fdsn.client import FDSNException
+ from eqcorrscan.core.match_filter.helpers.tribe import _download_st
+ from eqcorrscan.core.match_filter.helpers.processes import (
+ _get_detection_stream)
# This uses get_waveforms_bulk to get data - not all client types have
# this, so we check and monkey patch here.
@@ -767,290 +1383,192 @@ def client_detect(self, client, starttime, endtime, threshold,
"method, monkey-patching this")
client = get_waveform_client(client)
- party = Party()
+ if check_processing:
+ assert len(quick_group_templates(self.templates)) == 1, (
+ "Inconsistent template processing parameters found, this is no"
+ " longer supported. Split your tribe using "
+ "eqcorrscan.core.match_filter.template.quick_group_templates "
+ "and re-run for each group")
+
+ groups = kwargs.get("groups", None)
+
+ # Hard-coded buffer for downloading data, often data downloaded is
+ # not the correct length
buff = 300
- # Apply a buffer, often data downloaded is not the correct length
data_length = max([t.process_length for t in self.templates])
- pad = 0
- for template in self.templates:
- max_delay = (template.st.sort(['starttime'])[-1].stats.starttime -
- template.st.sort(['starttime'])[0].stats.starttime)
- if max_delay > pad:
- pad = max_delay
- download_groups = int(endtime - starttime) / data_length
- template_channel_ids = []
- for template in self.templates:
- for tr in template.st:
- if tr.stats.network not in [None, '']:
- chan_id = (tr.stats.network,)
- else:
- chan_id = ('*',)
- if tr.stats.station not in [None, '']:
- chan_id += (tr.stats.station,)
- else:
- chan_id += ('*',)
- if tr.stats.location not in [None, '']:
- chan_id += (tr.stats.location,)
- else:
- chan_id += ('*',)
- if tr.stats.channel not in [None, '']:
- if len(tr.stats.channel) == 2:
- chan_id += (tr.stats.channel[0] + '?' +
- tr.stats.channel[-1],)
- else:
- chan_id += (tr.stats.channel,)
- else:
- chan_id += ('*',)
- template_channel_ids.append(chan_id)
- template_channel_ids = list(set(template_channel_ids))
+
+ # Calculate overlap
+ overlap = max(_moveout(template.st) for template in self.templates)
+ assert overlap < data_length, (
+ f"Overlap of {overlap} s is larger than the length of data to "
+ f"be downloaded: {data_length} s - this won't work.")
+
+ # Work out start and end times of chunks to download
+ chunk_start, time_chunks = starttime, []
+ while chunk_start < endtime:
+ time_chunks.append((chunk_start, chunk_start + data_length + 20))
+ chunk_start += data_length - overlap
+
+ full_stream_dir = None
if return_stream:
- stream = Stream()
- if int(download_groups) < download_groups:
- download_groups = int(download_groups) + 1
- else:
- download_groups = int(download_groups)
- for i in range(download_groups):
- bulk_info = []
- for chan_id in template_channel_ids:
- bulk_info.append((
- chan_id[0], chan_id[1], chan_id[2], chan_id[3],
- starttime + (i * data_length) - (pad + buff),
- starttime + ((i + 1) * data_length) + (pad + buff)))
- for retry_attempt in range(retries):
- try:
- Logger.info("Downloading data")
- st = client.get_waveforms_bulk(bulk_info)
- Logger.info(
- "Downloaded data for {0} traces".format(len(st)))
- break
- except FDSNException as e:
- if "Split the request in smaller" in " ".join(e.args):
- Logger.warning(
- "Datacentre does not support large requests: "
- "splitting request into smaller chunks")
- st = Stream()
- for _bulk in bulk_info:
- try:
- st += client.get_waveforms_bulk([_bulk])
- except Exception as e:
- Logger.error("No data for {0}".format(_bulk))
- Logger.error(e)
- continue
- Logger.info("Downloaded data for {0} traces".format(
- len(st)))
- break
- except Exception as e:
- Logger.error(e)
- continue
- else:
- raise MatchFilterError(
- "Could not download data after {0} attempts".format(
- retries))
- # Get gaps and remove traces as necessary
- if min_gap:
- gaps = st.get_gaps(min_gap=min_gap)
- if len(gaps) > 0:
- Logger.warning("Large gaps in downloaded data")
- st.merge()
- gappy_channels = list(
- set([(gap[0], gap[1], gap[2], gap[3])
- for gap in gaps]))
- _st = Stream()
- for tr in st:
- tr_stats = (tr.stats.network, tr.stats.station,
- tr.stats.location, tr.stats.channel)
- if tr_stats in gappy_channels:
- Logger.warning(
- "Removing gappy channel: {0}".format(tr))
- else:
- _st += tr
- st = _st
- st.split()
- st.detrend("simple").merge()
- st.trim(starttime=starttime + (i * data_length) - pad,
- endtime=starttime + ((i + 1) * data_length) + pad)
- for tr in st:
- if not _check_daylong(tr):
- st.remove(tr)
- Logger.warning(
- "{0} contains more zeros than non-zero, "
- "removed".format(tr.id))
- for tr in st:
- if tr.stats.endtime - tr.stats.starttime < \
- 0.8 * data_length:
- st.remove(tr)
- Logger.warning(
- "{0} is less than 80% of the required length"
- ", removed".format(tr.id))
+ full_stream_dir = tempfile.mkdtemp()
+
+ poison_queue = Queue()
+
+ detector_kwargs = dict(
+ threshold=threshold, threshold_type=threshold_type,
+ trig_int=trig_int, plot=plot, plotdir=plotdir,
+ daylong=daylong, parallel_process=parallel_process,
+ xcorr_func=xcorr_func, concurrency=concurrency, cores=cores,
+ ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
+ group_size=group_size, overlap=None, full_peaks=full_peaks,
+ process_cores=process_cores, save_progress=save_progress,
+ return_stream=return_stream, check_processing=False,
+ poison_queue=poison_queue, shutdown=False,
+ concurrent_processing=concurrent_processing, groups=groups)
+
+ if not concurrent_processing:
+ Logger.warning("Using concurrent_processing=True can be faster if"
+ "downloading your data takes a long time. See "
+ "https://github.com/eqcorrscan/EQcorrscan/pull/544"
+ "for benchmarks.")
+ party = Party()
if return_stream:
- stream += st
- try:
- party += self.detect(
- stream=st, threshold=threshold,
- threshold_type=threshold_type, trig_int=trig_int,
- plot=plot, plotdir=plotdir, daylong=daylong,
- parallel_process=parallel_process, xcorr_func=xcorr_func,
- concurrency=concurrency, cores=cores,
- ignore_length=ignore_length,
- ignore_bad_data=ignore_bad_data, group_size=group_size,
- overlap=None, full_peaks=full_peaks,
- process_cores=process_cores, **kwargs)
- if save_progress:
- party.write("eqcorrscan_temporary_party")
- except Exception as e:
- Logger.critical(
- 'Error, routine incomplete, returning incomplete Party')
- Logger.error('Error: {0}'.format(e))
+ full_st = Stream()
+ for _starttime, _endtime in time_chunks:
+ st = _download_st(
+ starttime=_starttime, endtime=_endtime,
+ buff=buff, min_gap=min_gap,
+ template_channel_ids=self._template_channel_ids(),
+ client=client, retries=retries)
+ if len(st) == 0:
+ Logger.warning(f"No suitable data between {_starttime} "
+ f"and {_endtime}, skipping")
+ continue
+ party += self.detect(stream=st, pre_processed=False,
+ **detector_kwargs)
if return_stream:
- return party, stream
- else:
- return party
- for family in party:
- if family is not None:
- family.detections = family._uniq().detections
- if return_stream:
- return party, stream
- else:
+ full_st += st
+ if return_stream:
+ return party, full_st
return party
- def construct(self, method, lowcut, highcut, samp_rate, filt_order,
- length, prepick, swin="all", process_len=86400,
- all_horiz=False, delayed=True, plot=False, plotdir=None,
- min_snr=None, parallel=False, num_cores=False,
- skip_short_chans=False, save_progress=False, **kwargs):
- """
- Generate a Tribe of Templates.
-
- :type method: str
- :param method:
- Method of Tribe generation. Possible options are: `from_client`,
- `from_meta_file`. See below on the additional required arguments
- for each method.
- :type lowcut: float
- :param lowcut:
- Low cut (Hz), if set to None will not apply a lowcut
- :type highcut: float
- :param highcut:
- High cut (Hz), if set to None will not apply a highcut.
- :type samp_rate: float
- :param samp_rate:
- New sampling rate in Hz.
- :type filt_order: int
- :param filt_order:
- Filter level (number of corners).
- :type length: float
- :param length: Length of template waveform in seconds.
- :type prepick: float
- :param prepick: Pre-pick time in seconds
- :type swin: str
- :param swin:
- P, S, P_all, S_all or all, defaults to all: see note in
- :func:`eqcorrscan.core.template_gen.template_gen`
- :type process_len: int
- :param process_len: Length of data in seconds to download and process.
- :type all_horiz: bool
- :param all_horiz:
- To use both horizontal channels even if there is only a pick on
- one of them. Defaults to False.
- :type delayed: bool
- :param delayed: If True, each channel will begin relative to it's own
- pick-time, if set to False, each channel will begin at the same
- time.
- :type plot: bool
- :param plot: Plot templates or not.
- :type plotdir: str
- :param plotdir:
- The path to save plots to. If `plotdir=None` (default) then the
- figure will be shown on screen.
- :type min_snr: float
- :param min_snr:
- Minimum signal-to-noise ratio for a channel to be included in the
- template, where signal-to-noise ratio is calculated as the ratio
- of the maximum amplitude in the template window to the rms
- amplitude in the whole window given.
- :type parallel: bool
- :param parallel: Whether to process data in parallel or not.
- :type num_cores: int
- :param num_cores:
- Number of cores to try and use, if False and parallel=True,
- will use either all your cores, or as many traces as in the data
- (whichever is smaller).
- :type save_progress: bool
- :param save_progress:
- Whether to save the resulting template set at every data step or
- not. Useful for long-running processes.
- :type skip_short_chans: bool
- :param skip_short_chans:
- Whether to ignore channels that have insufficient length data or
- not. Useful when the quality of data is not known, e.g. when
- downloading old, possibly triggered data from a datacentre
- :type save_progress: bool
- :param save_progress:
- Whether to save the resulting party at every data step or not.
- Useful for long-running processes.
-
- .. note::
- *Method specific arguments:*
+ # Get data in advance
+ time_queue = Queue()
+ stream_queue = Queue(maxsize=1)
+
+ downloader = Process(
+ target=_get_detection_stream,
+ kwargs=dict(
+ input_time_queue=time_queue,
+ client=client,
+ retries=retries,
+ min_gap=min_gap,
+ buff=buff,
+ output_filename_queue=stream_queue,
+ poison_queue=poison_queue,
+ temp_stream_dir=self._stream_dir,
+ full_stream_dir=full_stream_dir,
+ pre_process=True, parallel_process=parallel_process,
+ process_cores=process_cores, daylong=daylong,
+ overlap=0.0, ignore_length=ignore_length,
+ ignore_bad_data=ignore_bad_data,
+ filt_order=self.templates[0].filt_order,
+ highcut=self.templates[0].highcut,
+ lowcut=self.templates[0].lowcut,
+ samp_rate=self.templates[0].samp_rate,
+ process_length=self.templates[0].process_length,
+ template_channel_ids=self._template_channel_ids(),
+ ),
+ name="DownloadProcess"
+ )
+
+ # Cope with old tribes
+ if not hasattr(self, '_processes'):
+ self._processes = dict()
+ if not hasattr(self, '_queues'):
+ self._queues = dict()
+ # Put processes and queues into shared state
+ self._processes.update({
+ "downloader": downloader,
+ })
+ self._queues.update({
+ "times": time_queue,
+ "poison": poison_queue,
+ "stream": stream_queue,
+ })
+
+ # Fill time queue
+ for time_chunk in time_chunks:
+ time_queue.put(time_chunk)
+ # Close off queue
+ time_queue.put(None)
+
+ # Start up processes
+ downloader.start()
+
+ party = self.detect(
+ stream=stream_queue, pre_processed=True, **detector_kwargs)
+
+ # Close and join processes
+ self._close_processes()
+ self._close_queues()
- - `from_client` requires:
- :param str client_id:
- string passable by obspy to generate Client, or any object
- with a `get_waveforms` method, including a Client instance.
- :param `obspy.core.event.Catalog` catalog:
- Catalog of events to generate template for
- :param float data_pad: Pad length for data-downloads in seconds
- - `from_meta_file` requires:
- :param str meta_file:
- Path to obspy-readable event file, or an obspy Catalog
- :param `obspy.core.stream.Stream` st:
- Stream containing waveform data for template. Note that
- this should be the same length of stream as you will use
- for the continuous detection, e.g. if you detect in
- day-long files, give this a day-long file!
- :param bool process:
- Whether to process the data or not, defaults to True.
+ if return_stream:
+ full_st = read(os.path.join(full_stream_dir, "*"))
+ shutil.rmtree(full_stream_dir)
+ return party, full_st
+ return party
- .. Note::
- Method: `from_sac` is not supported by Tribe.construct and must
- use Template.construct.
+ def _close_processes(
+ self,
+ terminate: bool = False,
+ processes: dict = None
+ ):
+ processes = processes or self._processes
+ for p_name, p in processes.items():
+ if terminate:
+ Logger.warning(f"Terminating {p_name}")
+ p.terminate()
+ continue
+ try:
+ Logger.info(f"Joining {p_name}")
+ p.join(timeout=self._timeout)
+ except Exception as e:
+ Logger.error(f"Failed to join due to {e}: terminating")
+ p.terminate()
+ Logger.info(f"Closing {p_name}")
+ try:
+ p.close()
+ except Exception as e:
+ Logger.error(
+ f"Failed to close {p_name} due to {e}, terminating")
+ p.terminate()
+ Logger.info("Finished closing processes")
+ return
- .. Note:: Templates will be named according to their start-time.
- """
- templates, catalog, process_lengths = template_gen.template_gen(
- method=method, lowcut=lowcut, highcut=highcut, length=length,
- filt_order=filt_order, samp_rate=samp_rate, prepick=prepick,
- return_event=True, save_progress=save_progress, swin=swin,
- process_len=process_len, all_horiz=all_horiz, plotdir=plotdir,
- delayed=delayed, plot=plot, min_snr=min_snr, parallel=parallel,
- num_cores=num_cores, skip_short_chans=skip_short_chans,
- **kwargs)
- for template, event, process_len in zip(templates, catalog,
- process_lengths):
- t = Template()
- for tr in template:
- if not np.any(tr.data.astype(np.float16)):
- Logger.warning('Data are zero in float16, missing data,'
- ' will not use: {0}'.format(tr.id))
- template.remove(tr)
- if len(template) == 0:
- Logger.error('Empty Template')
+ def _close_queues(self, queues: dict = None):
+ queues = queues or self._queues
+ for q_name, q in queues.items():
+ if q._closed:
continue
- t.st = template
- t.name = template.sort(['starttime'])[0]. \
- stats.starttime.strftime('%Y_%m_%dt%H_%M_%S')
- t.lowcut = lowcut
- t.highcut = highcut
- t.filt_order = filt_order
- t.samp_rate = samp_rate
- t.process_length = process_len
- t.prepick = prepick
- event.comments.append(Comment(
- text="eqcorrscan_template_" + t.name,
- creation_info=CreationInfo(agency='eqcorrscan',
- author=getpass.getuser())))
- t.event = event
- self.templates.append(t)
- return self
+ Logger.info(f"Emptying {q_name}")
+ while True:
+ try:
+ q.get_nowait()
+ except Empty:
+ break
+ Logger.info(f"Closing {q_name}")
+ q.close()
+ Logger.info("Finished closing queues")
+ return
+
+ def _on_error(self, error):
+ """ Gracefully close all child processes and queues and raise error """
+ self._close_processes(terminate=True)
+ self._close_queues()
+ Logger.info("Admin complete, raising error")
+ raise error
def read_tribe(fname):
@@ -1070,7 +1588,7 @@ def read_tribe(fname):
doctest.testmod()
# List files to be removed after doctest
- cleanup = ['test_tribe.tgz']
+ cleanup = ['test_tribe.tgz', "test_tribe.pkl"]
for f in cleanup:
if os.path.isfile(f):
os.remove(f)
diff --git a/eqcorrscan/core/subspace.py b/eqcorrscan/core/subspace.py
index 649f9379d..13986dd01 100644
--- a/eqcorrscan/core/subspace.py
+++ b/eqcorrscan/core/subspace.py
@@ -14,7 +14,6 @@
"""
import numpy as np
import logging
-import time
import h5py
import getpass
import eqcorrscan
@@ -410,7 +409,7 @@ def read(self, filename):
self.name = f['data'].attrs['name'].decode('ascii')
return self
- def plot(self, stachans='all', size=(10, 7), show=True):
+ def plot(self, stachans='all', size=(10, 7), show=True, *args, **kwargs):
"""
Plot the output basis vectors for the detector at the given dimension.
@@ -435,7 +434,7 @@ def plot(self, stachans='all', size=(10, 7), show=True):
for example.
"""
return subspace_detector_plot(detector=self, stachans=stachans,
- size=size, show=show)
+ size=size, show=show, *args, **kwargs)
def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0,
@@ -684,7 +683,6 @@ def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
:return: List of delays
:rtype: list
"""
- from multiprocessing import Pool, cpu_count
processed_streams = []
if not stachans:
input_stachans = list(set([(tr.stats.station, tr.stats.channel)
@@ -701,33 +699,27 @@ def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
raise IOError(
'All channels of all streams must be the same length')
for st in streams:
- if not parallel:
- processed_stream = Stream()
- for stachan in input_stachans:
- dummy, tr = _internal_process(
- st=st, lowcut=lowcut, highcut=highcut,
- filt_order=filt_order, sampling_rate=sampling_rate,
- first_length=first_length, stachan=stachan)
+ processed = pre_processing.multi_process(
+ st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
+ samp_rate=sampling_rate, seisan_chan_names=False)
+ # Add in empty channels if needed and sort
+ processed_stream = Stream()
+ for stachan in input_stachans:
+ tr = processed.select(station=stachan[0], channel=stachan[1])
+ if len(tr) == 0:
+ tr = Trace(np.zeros(int(first_length * sampling_rate)))
+ tr.stats.station = stachan[0]
+ tr.stats.channel = stachan[1]
+ tr.stats.sampling_rate = sampling_rate
+ tr.stats.starttime = st[0].stats.starttime
+ # Do this to make more sensible plots
+ Logger.warning('Padding stream with zero trace for ' +
+ 'station ' + stachan[0] + '.' + stachan[1])
processed_stream += tr
- processed_streams.append(processed_stream)
- else:
- pool = Pool(processes=min(cores, cpu_count()))
- results = [pool.apply_async(
- _internal_process, (st,),
- {'lowcut': lowcut, 'highcut': highcut,
- 'filt_order': filt_order, 'sampling_rate': sampling_rate,
- 'first_length': first_length, 'stachan': stachan,
- 'i': i}) for i, stachan in enumerate(input_stachans)]
- pool.close()
- try:
- processed_stream = [p.get() for p in results]
- except KeyboardInterrupt as e: # pragma: no cover
- pool.terminate()
- raise e
- pool.join()
- processed_stream.sort(key=lambda tup: tup[0])
- processed_stream = Stream([p[1] for p in processed_stream])
- processed_streams.append(processed_stream)
+ processed_stream += tr
+ assert [(tr.stats.station, tr.stats.channel)
+ for tr in processed_stream] == input_stachans
+ processed_streams.append(processed_stream)
if no_missed and multiplex:
for tr in processed_stream:
if np.count_nonzero(tr.data) == 0:
@@ -769,30 +761,6 @@ def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
return output_streams, input_stachans
-def _internal_process(st, lowcut, highcut, filt_order, sampling_rate,
- first_length, stachan, i=0):
- tr = st.select(station=stachan[0], channel=stachan[1])
- if len(tr) == 0:
- tr = Trace(np.zeros(int(first_length * sampling_rate)))
- tr.stats.station = stachan[0]
- tr.stats.channel = stachan[1]
- tr.stats.sampling_rate = sampling_rate
- tr.stats.starttime = st[0].stats.starttime # Do this to make more
- # sensible plots
- Logger.warning('Padding stream with zero trace for ' +
- 'station ' + stachan[0] + '.' + stachan[1])
- elif len(tr) == 1:
- tr = tr[0]
- tr.detrend('simple')
- tr = pre_processing.process(
- tr=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
- samp_rate=sampling_rate, seisan_chan_names=False)
- else:
- raise IOError('Multiple channels for {0}.{1} in a single design '
- 'stream.'.format(stachan[0], stachan[1]))
- return i, tr
-
-
def read_detector(filename):
"""
Read detector from a filename.
@@ -833,8 +801,8 @@ def multi(stream):
"""
stack = stream[0].data
for tr in stream[1:]:
- stack = np.dstack(np.array([stack, tr.data]))
- multiplex = stack.reshape(stack.size, )
+ stack = np.vstack((stack, tr.data))
+ multiplex = stack.T.reshape(stack.size, )
return multiplex
diff --git a/eqcorrscan/core/template_gen.py b/eqcorrscan/core/template_gen.py
index b431d0ea3..7f361497c 100644
--- a/eqcorrscan/core/template_gen.py
+++ b/eqcorrscan/core/template_gen.py
@@ -17,6 +17,8 @@
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
+import warnings
+
import numpy as np
import logging
import os
@@ -27,7 +29,7 @@
from eqcorrscan.utils.sac_util import sactoevent
from eqcorrscan.utils import pre_processing
-from eqcorrscan.core import EQcorrscanDeprecationWarning
+# from eqcorrscan.core import EQcorrscanDeprecationWarning
Logger = logging.getLogger(__name__)
@@ -52,10 +54,11 @@ def __str__(self):
def template_gen(method, lowcut, highcut, samp_rate, filt_order,
length, prepick, swin="all", process_len=86400,
- all_horiz=False, delayed=True, plot=False, plotdir=None,
- return_event=False, min_snr=None, parallel=False,
- num_cores=False, save_progress=False, skip_short_chans=False,
- **kwargs):
+ all_vert=False, all_horiz=False, delayed=True, plot=False,
+ plotdir=None, return_event=False, min_snr=None,
+ parallel=False, num_cores=False, save_progress=False,
+ skip_short_chans=False, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2'], **kwargs):
"""
Generate processed and cut waveforms for use as templates.
@@ -82,6 +85,10 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
:func:`eqcorrscan.core.template_gen.template_gen`
:type process_len: int
:param process_len: Length of data in seconds to download and process.
+ :type all_vert: bool
+ :param all_vert:
+ To use all channels defined in vertical_chans for P-arrivals even if
+ there is only a pick on one of them. Defaults to False.
:type all_horiz: bool
:param all_horiz:
To use both horizontal channels even if there is only a pick on one of
@@ -119,18 +126,26 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
Whether to ignore channels that have insufficient length data or not.
Useful when the quality of data is not known, e.g. when downloading
old, possibly triggered data from a datacentre
+ :type vertical_chans: list
+ :param vertical_chans:
+ List of channel endings on which P-picks are accepted.
+ :type horizontal_chans: list
+ :param horizontal_chans:
+ List of channel endings for horizontal channels, on which S-picks are
+ accepted.
:returns: List of :class:`obspy.core.stream.Stream` Templates
:rtype: list
.. note::
By convention templates are generated with P-phases on the
- vertical channel and S-phases on the horizontal channels, normal
- seismograph naming conventions are assumed, where Z denotes vertical
- and N, E, R, T, 1 and 2 denote horizontal channels, either oriented
- or not. To this end we will **only** use Z channels if they have a
- P-pick, and will use one or other horizontal channels **only** if
- there is an S-pick on it.
+ vertical channel [can be multiple, e.g., Z (vertical) and H
+ (hydrophone) for an ocean bottom seismometer] and S-phases on the
+ horizontal channels. By default, normal seismograph naming conventions
+ are assumed, where Z denotes vertical and N, E, 1 and 2 denote
+ horizontal channels, either oriented or not. To this end we will
+ **only** use vertical channels if they have a P-pick, and will use one
+ or other horizontal channels **only** if there is an S-pick on it.
.. warning::
If there is no phase_hint included in picks, and swin=all, all channels
@@ -338,17 +353,13 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
if len(st) == 0:
Logger.info("No data")
continue
+ kwargs = dict(
+ st=st, lowcut=lowcut, highcut=highcut,
+ filt_order=filt_order, samp_rate=samp_rate,
+ parallel=parallel, num_cores=num_cores, daylong=daylong)
if daylong:
- st = pre_processing.dayproc(
- st=st, lowcut=lowcut, highcut=highcut,
- filt_order=filt_order, samp_rate=samp_rate,
- parallel=parallel, starttime=UTCDateTime(starttime),
- num_cores=num_cores)
- else:
- st = pre_processing.shortproc(
- st=st, lowcut=lowcut, highcut=highcut,
- filt_order=filt_order, parallel=parallel,
- samp_rate=samp_rate, num_cores=num_cores)
+ kwargs.update(dict(starttime=UTCDateTime(starttime)))
+ st = pre_processing.multi_process(**kwargs)
data_start = min([tr.stats.starttime for tr in st])
data_end = max([tr.stats.endtime for tr in st])
@@ -388,8 +399,9 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
# Cut and extract the templates
template = _template_gen(
event.picks, st, length, swin, prepick=prepick, plot=plot,
- all_horiz=all_horiz, delayed=delayed, min_snr=min_snr,
- plotdir=plotdir)
+ all_vert=all_vert, all_horiz=all_horiz, delayed=delayed,
+ min_snr=min_snr, vertical_chans=vertical_chans,
+ horizontal_chans=horizontal_chans, plotdir=plotdir)
process_lengths.append(len(st[0].data) / samp_rate)
temp_list.append(template)
catalog_out += event
@@ -471,7 +483,7 @@ def extract_from_stack(stack, template, length, pre_pick, pre_pad,
# Process the data if necessary
if not pre_processed:
- new_template = pre_processing.shortproc(
+ new_template = pre_processing.multi_process(
st=new_template, lowcut=lowcut, highcut=highcut,
filt_order=filt_order, samp_rate=samp_rate)
# Loop through the stack and trim!
@@ -560,7 +572,7 @@ def _download_from_client(client, client_type, catalog, data_pad, process_len,
"percent of the desired length, will not use".format(
tr.stats.station, tr.stats.channel,
(tr.stats.endtime - tr.stats.starttime) / 3600))
- elif not pre_processing._check_daylong(tr):
+ elif not pre_processing._check_daylong(tr.data):
Logger.warning(
"Data are mostly zeros, removing trace: {0}".format(tr.id))
else:
@@ -582,9 +594,10 @@ def _rms(array):
return np.sqrt(np.mean(np.square(array)))
-def _template_gen(picks, st, length, swin='all', prepick=0.05,
+def _template_gen(picks, st, length, swin='all', prepick=0.05, all_vert=False,
all_horiz=False, delayed=True, plot=False, min_snr=None,
- plotdir=None):
+ plotdir=None, vertical_chans=['Z'],
+ horizontal_chans=['E', 'N', '1', '2']):
"""
Master function to generate a multiplexed template for a single event.
@@ -607,6 +620,10 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
:param prepick:
Length in seconds to extract before the pick time default is 0.05
seconds.
+ :type all_vert: bool
+ :param all_vert:
+ To use all channels defined in vertical_chans for P-arrivals even if
+ there is only a pick on one of them. Defaults to False.
:type all_horiz: bool
:param all_horiz:
To use both horizontal channels even if there is only a pick on one
@@ -630,18 +647,26 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
:param plotdir:
The path to save plots to. If `plotdir=None` (default) then the figure
will be shown on screen.
+ :type vertical_chans: list
+ :param vertical_chans:
+ List of channel endings on which P-picks are accepted.
+ :type horizontal_chans: list
+ :param horizontal_chans:
+ List of channel endings for horizontal channels, on which S-picks are
+ accepted.
:returns: Newly cut template.
:rtype: :class:`obspy.core.stream.Stream`
.. note::
By convention templates are generated with P-phases on the
- vertical channel and S-phases on the horizontal channels, normal
- seismograph naming conventions are assumed, where Z denotes vertical
- and N, E, R, T, 1 and 2 denote horizontal channels, either oriented
- or not. To this end we will **only** use Z channels if they have a
- P-pick, and will use one or other horizontal channels **only** if
- there is an S-pick on it.
+ vertical channel [can be multiple, e.g., Z (vertical) and H
+ (hydrophone) for an ocean bottom seismometer] and S-phases on the
+ horizontal channels. By default, normal seismograph naming conventions
+ are assumed, where Z denotes vertical and N, E, 1 and 2 denote
+ horizontal channels, either oriented or not. To this end we will
+ **only** use vertical channels if they have a P-pick, and will use one
+ or other horizontal channels **only** if there is an S-pick on it.
.. note::
swin argument: Setting to `P` will return only data for channels
@@ -690,10 +715,14 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
for tr in st:
# Check that the data can be represented by float16, and check they
# are not all zeros
- if np.all(tr.data.astype(np.float16) == 0):
- Logger.error("Trace is all zeros at float16 level, either gain or "
- "check. Not using in template: {0}".format(tr))
- continue
+ # Catch RuntimeWarning for overflow in casting
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", category=RuntimeWarning)
+ if np.all(tr.data.astype(np.float16) == 0):
+ Logger.error(
+ "Trace is all zeros at float16 level, either gain or "
+ f"check. Not using in template: {tr}")
+ continue
st_copy += tr
st = st_copy
if len(st) == 0:
@@ -735,13 +764,18 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
continue
starttime.update({'picks': s_pick})
elif _swin == 'all':
- if all_horiz and tr.stats.channel[-1] in ['1', '2', '3',
- 'N', 'E']:
+ if all_vert and tr.stats.channel[-1] in vertical_chans:
+ # Get all picks on vertical channels
+ channel_pick = [
+ pick for pick in station_picks
+ if pick.waveform_id.channel_code[-1] in
+ vertical_chans]
+ elif all_horiz and tr.stats.channel[-1] in horizontal_chans:
# Get all picks on horizontal channels
channel_pick = [
pick for pick in station_picks
if pick.waveform_id.channel_code[-1] in
- ['1', '2', '3', 'N', 'E']]
+ horizontal_chans]
else:
channel_pick = [
pick for pick in station_picks
@@ -751,8 +785,11 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
starttime.update({'picks': channel_pick})
elif _swin == 'P':
p_pick = [pick for pick in station_picks
- if pick.phase_hint.upper()[0] == 'P' and
- pick.waveform_id.channel_code == tr.stats.channel]
+ if pick.phase_hint.upper()[0] == 'P']
+ if not all_vert:
+ p_pick = [pick for pick in p_pick
+ if pick.waveform_id.channel_code ==
+ tr.stats.channel]
if len(p_pick) == 0:
Logger.debug(
f"No picks with phase_hint P "
@@ -760,7 +797,7 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
continue
starttime.update({'picks': p_pick})
elif _swin == 'S':
- if tr.stats.channel[-1] in ['Z', 'U']:
+ if tr.stats.channel[-1] in vertical_chans:
continue
s_pick = [pick for pick in station_picks
if pick.phase_hint.upper()[0] == 'S']
@@ -864,6 +901,7 @@ def _group_events(catalog, process_len, template_length, data_pad):
:rtype: list
"""
# case for catalog only containing one event
+ assert len(catalog), "No events to group"
if len(catalog) == 1:
return [catalog]
sub_catalogs = []
diff --git a/eqcorrscan/doc/RCET_logo_transparent.png b/eqcorrscan/doc/RCET_logo_transparent.png
new file mode 100644
index 000000000..1358c8430
Binary files /dev/null and b/eqcorrscan/doc/RCET_logo_transparent.png differ
diff --git a/eqcorrscan/doc/requirements.txt b/eqcorrscan/doc/requirements.txt
index 312a9f530..22a047738 100644
--- a/eqcorrscan/doc/requirements.txt
+++ b/eqcorrscan/doc/requirements.txt
@@ -1,4 +1,5 @@
matplotlib
+boto3
mock
# Note that mock is replaced by unittest.mock now, and we should update the test code to use this
nbsphinx
diff --git a/eqcorrscan/doc/submodules/utils.correlate.rst b/eqcorrscan/doc/submodules/utils.correlate.rst
index dd05c82c9..7b12a3905 100644
--- a/eqcorrscan/doc/submodules/utils.correlate.rst
+++ b/eqcorrscan/doc/submodules/utils.correlate.rst
@@ -143,13 +143,11 @@ for example:
>>> # do correlation using a custom function
>>> def custom_normxcorr(templates, stream, pads, *args, **kwargs):
... # Just to keep example short call other xcorr function
- ... print('calling custom xcorr function')
... return numpy_normxcorr(templates, stream, pads, *args, **kwargs)
>>> detections = match_filter(
... ['1'], [template], stream, .5, 'absolute', 1, False,
... xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS
- calling custom xcorr function...
You can also use the set_xcorr object (eqcorrscan.utils.correlate.set_xcorr)
@@ -162,14 +160,12 @@ or within the scope of a context manager:
>>> with set_xcorr(custom_normxcorr):
... detections = match_filter(['1'], [template], stream, .5,
... 'absolute', 1, False) # doctest:+ELLIPSIS
- calling custom xcorr function...
>>> # permanently set the xcorr function (until the python kernel restarts)
>>> set_xcorr(custom_normxcorr)
default changed to custom_normxcorr
>>> detections = match_filter(['1'], [template], stream, .5, 'absolute',
... 1, False) # doctest:+ELLIPSIS
- calling custom xcorr function...
>>> set_xcorr.revert() # change it back to the previous state
diff --git a/eqcorrscan/doc/submodules/utils.pre_processing.rst b/eqcorrscan/doc/submodules/utils.pre_processing.rst
index 7fa90319a..eac1120d2 100644
--- a/eqcorrscan/doc/submodules/utils.pre_processing.rst
+++ b/eqcorrscan/doc/submodules/utils.pre_processing.rst
@@ -17,8 +17,6 @@ be padded after filtering if you decide not to use these routines (see notes in
:toctree: autogen
:nosignatures:
- dayproc
- process
- shortproc
+ multi_process
.. comment to end block
diff --git a/eqcorrscan/doc/tutorial.rst b/eqcorrscan/doc/tutorial.rst
index 9c07affb3..c1a52555f 100644
--- a/eqcorrscan/doc/tutorial.rst
+++ b/eqcorrscan/doc/tutorial.rst
@@ -79,10 +79,9 @@ you want to see in the tutorials please let us know on github.
:titlesonly:
tutorials/quick_start.ipynb
- tutorials/matched-filter.rst
+ tutorials/matched-filter.ipynb
tutorials/template-creation.rst
tutorials/subspace.rst
- tutorials/lag-calc.rst
tutorials/mag-calc.rst
tutorials/clustering.rst
diff --git a/eqcorrscan/doc/tutorials/.gitignore b/eqcorrscan/doc/tutorials/.gitignore
new file mode 100644
index 000000000..97f77c09b
--- /dev/null
+++ b/eqcorrscan/doc/tutorials/.gitignore
@@ -0,0 +1 @@
+tutorial_waveforms
diff --git a/eqcorrscan/doc/tutorials/lag-calc.rst b/eqcorrscan/doc/tutorials/lag-calc.rst
deleted file mode 100644
index f38d102d3..000000000
--- a/eqcorrscan/doc/tutorials/lag-calc.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-Lag-time and pick correction
-============================
-
-The following is a work-in-progress tutorial for lag-calc functionality.
-
-An important note
------------------
-Picks generated by lag-calc are relative to the start of the template waveform,
-for example, if you generated your templates with a pre_pick of 0.2, you
-should expect picks to occur 0.2 seconds before the actual phase arrival.
-The result of this is that origin-times will be shifted by the same amount.
-
-If you have applied different pre_picks to different channels when generating
-template (currently not supported by any EQcorrscan functions), then picks
-generated here will not give the correct location.
-
-Advanced Example: Parkfield 2004
---------------------------------
-
-.. literalinclude:: ../../tutorials/lag_calc.py
diff --git a/eqcorrscan/doc/tutorials/matched-filter.ipynb b/eqcorrscan/doc/tutorials/matched-filter.ipynb
new file mode 100644
index 000000000..987c12188
--- /dev/null
+++ b/eqcorrscan/doc/tutorials/matched-filter.ipynb
@@ -0,0 +1,766 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Matched-filters\n",
+ "\n",
+ "This notebook will provide a look at using EQcorrscan's Tribe objects for matched-filter detection of earthquakes.\n",
+ "\n",
+ "This notebook extends on the ideas covered in the [Quick Start](quick_start.ipynb) notebook. In particular this\n",
+ "notebook also covers:\n",
+ "1. Concurrent execution of detection workflows for more efficient compute utilisation with large datasets;\n",
+ "2. Use of local waveform databases using [obsplus](https://github.com/niosh-mining/obsplus);\n",
+ "3. Cross-correlation pick-correction using the `lag_calc` method."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "96a39ad3defaeedc"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Set up\n",
+ "\n",
+ "We are going to focus in this notebook on using local data. For examples of how to directly use data from online providers\n",
+ "see the [Quick Start](quick_start.ipynb) notebook. \n",
+ "\n",
+ "To start off we will download some data - in your case this is likely data that you have either downloaded from one or more\n",
+ "online providers, or data that you have collected yourself. At the moment we don't care how those data are organised, as long\n",
+ "as you have continuous data on disk somewhere. We will use [obsplus](https://github.com/niosh-mining/obsplus) to work out\n",
+ "what data are were and provide us with the data that we need when we need it.\n",
+ "\n",
+ "Obsplus is great and has more functionality than we expose here - if you make use of obsplus, please cite the \n",
+ "paper by [Chambers et al., (2021)](https://joss.theoj.org/papers/10.21105/joss.02696).\n",
+ "\n",
+ "As in the [Quick Start](quick_start.ipynb) example, we will control the output level from EQcorrscan using logging."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "7c244daa3adde5ea"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "\n",
+ "logging.basicConfig(\n",
+ " level=logging.WARNING,\n",
+ " format=\"%(asctime)s\\t%(name)s\\t%(levelname)s\\t%(message)s\")\n",
+ "\n",
+ "Logger = logging.getLogger(\"TutorialLogger\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:18:30.192025542Z",
+ "start_time": "2023-12-01T00:18:30.187031966Z"
+ }
+ },
+ "id": "afb90fba397b3674"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We will use the March 2023 Kawarau swarm as our case-study for this. This was an energetic swarm that\n",
+ "was reported by New Zealand's GeoNet monitoring agency and discussed in a news article [here](https://www.geonet.org.nz/response/VJW80CGEPtq0JPCBHlNaR).\n",
+ "\n",
+ "We will use data from ten stations over a duration of two days. The swarm lasted longer than this, but\n",
+ "we need to limit compute resources for this tutorial! Feel free to change the end-date below to run\n",
+ "for longer. To be kind to GeoNet and not repeatedly get data from their FDSN service we are going to get data from the AWS open data bucket. If you don't already have boto3 installed you will need to install that for this sections (`conda install boto3` or `pip install boto3`).\n",
+ "\n",
+ "NB: If you actually want to access the GeoNet data bucket using Python, a drop-in replacement from FDSN clients exists [here](https://github.com/calum-chamberlain/cjc-utilities/blob/main/src/cjc_utilities/get_data/geonet_aws_client.py)"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "b4baffba897550b8"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "outputs": [],
+ "source": [
+ "def get_geonet_data(starttime, endtime, stations, outdir):\n",
+ " import os\n",
+ " import boto3\n",
+ " from botocore import UNSIGNED\n",
+ " from botocore.config import Config\n",
+ " \n",
+ " GEONET_AWS = \"geonet-open-data\"\n",
+ " \n",
+ " DAY_STRUCT = \"waveforms/miniseed/{date.year}/{date.year}.{date.julday:03d}\"\n",
+ " CHAN_STRUCT = (\"{station}.{network}/{date.year}.{date.julday:03d}.\"\n",
+ " \"{station}.{location}-{channel}.{network}.D\")\n",
+ " if not os.path.isdir(outdir):\n",
+ " os.makedirs(outdir)\n",
+ " \n",
+ " bob = boto3.resource('s3', config=Config(signature_version=UNSIGNED))\n",
+ " s3 = bob.Bucket(GEONET_AWS)\n",
+ " \n",
+ " date = starttime\n",
+ " while date < endtime:\n",
+ " day_path = DAY_STRUCT.format(date=date)\n",
+ " for station in stations:\n",
+ " for instrument in \"HE\":\n",
+ " for component in \"ZNE12\":\n",
+ " channel = f\"{instrument}H{component}\"\n",
+ " chan_path = CHAN_STRUCT.format(\n",
+ " station=station, network=\"NZ\",\n",
+ " date=date, location=\"10\", channel=channel)\n",
+ " local_path = os.path.join(outdir, chan_path)\n",
+ " if os.path.isfile(local_path):\n",
+ " Logger.info(f\"Skipping {local_path}: exists\")\n",
+ " continue\n",
+ " os.makedirs(os.path.dirname(local_path), exist_ok=True)\n",
+ " remote = \"/\".join([day_path, chan_path])\n",
+ " Logger.debug(f\"Downloading from {remote}\")\n",
+ " try:\n",
+ " s3.download_file(remote, local_path)\n",
+ " except Exception as e:\n",
+ " Logger.debug(f\"Could not download {remote} due to {e}\")\n",
+ " continue\n",
+ " Logger.info(f\"Downloaded {remote}\")\n",
+ " date += 86400"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:18:33.554168095Z",
+ "start_time": "2023-12-01T00:18:33.546942071Z"
+ }
+ },
+ "id": "a5e81f234705ab54"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "\n",
+ "from obspy import UTCDateTime\n",
+ "\n",
+ "starttime, endtime = UTCDateTime(2023, 3, 17), UTCDateTime(2023, 3, 19)\n",
+ "stations = ['EDRZ', 'LIRZ', 'MARZ', 'MKRZ', 'OMRZ', 'OPRZ', 'TARZ', 'WKHS', 'HNCZ', 'KARZ']\n",
+ "\n",
+ "outdir = \"tutorial_waveforms\"\n",
+ "\n",
+ "get_geonet_data(starttime=starttime, endtime=endtime, stations=stations, outdir=outdir)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:19:54.148732360Z",
+ "start_time": "2023-12-01T00:18:34.938266088Z"
+ }
+ },
+ "id": "a4182117cbf6692c"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Great, now we have some data. EQcorrscan is well set up to use clients for data access,\n",
+ "using clients allows EQcorrscan to request the data that it needs and take care of \n",
+ "overlapping chunks of data to ensure that no data are missed: network-based\n",
+ "matched-filters apply a delay-and-stack step to the correlations from individual\n",
+ "channels. This increases the signal-to-noise ratio of the correlation sum. However,\n",
+ "because of the delay part, the stacks made at start and end of chunks of waveform\n",
+ "data do not use the full network. To get around this *you should overlap your data*.\n",
+ "\n",
+ "If you use client-based access to data, EQcorrscan will take care of this for you.\n",
+ "\n",
+ "So how do you use clients for local data? Make a local database using obsplus.\n",
+ "\n",
+ "If you don't have obsplus installed you should install it now (`conda install obsplus`\n",
+ "or `pip install obsplus`)."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "864c0532837b9fc"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " network station location channel starttime \\\n0 NZ EDRZ 10 EHE 2023-03-17 00:00:03.528394 \n1 NZ EDRZ 10 EHN 2023-03-17 00:00:05.458394 \n2 NZ EDRZ 10 EHZ 2023-03-17 00:00:03.528394 \n3 NZ KARZ 10 EHE 2023-03-17 00:00:02.963130 \n4 NZ KARZ 10 EHN 2023-03-17 00:00:00.093130 \n5 NZ KARZ 10 EHZ 2023-03-17 00:00:05.823130 \n6 NZ LIRZ 10 EHE 2023-03-17 00:00:01.753132 \n7 NZ LIRZ 10 EHN 2023-03-17 00:00:02.913132 \n8 NZ LIRZ 10 EHZ 2023-03-17 00:00:01.463132 \n9 NZ MARZ 10 EHE 2023-03-17 00:00:01.553130 \n10 NZ MARZ 10 EHN 2023-03-17 00:00:01.683130 \n11 NZ MARZ 10 EHZ 2023-03-17 00:00:00.963130 \n12 NZ MKRZ 10 EHE 2023-03-17 00:00:01.673129 \n13 NZ MKRZ 10 EHN 2023-03-17 00:00:00.143129 \n14 NZ MKRZ 10 EHZ 2023-03-17 00:00:00.053129 \n15 NZ OMRZ 10 EHE 2023-03-17 00:00:02.740000 \n16 NZ OMRZ 10 EHN 2023-03-17 00:00:00.580000 \n17 NZ OMRZ 10 EHZ 2023-03-17 00:00:04.110000 \n18 NZ OPRZ 10 HHE 2023-03-17 00:00:02.993132 \n19 NZ OPRZ 10 HHN 2023-03-17 00:00:03.473132 \n20 NZ OPRZ 10 HHZ 2023-03-17 00:00:01.963132 \n21 NZ TARZ 10 EHE 2023-03-17 00:00:01.850000 \n22 NZ TARZ 10 EHN 2023-03-17 00:00:00.760000 \n23 NZ TARZ 10 EHZ 2023-03-17 00:00:00.630000 \n\n endtime \n0 2023-03-19 00:00:00.098393 \n1 2023-03-19 00:00:04.518393 \n2 2023-03-19 00:00:03.588393 \n3 2023-03-19 00:00:01.273126 \n4 2023-03-19 00:00:00.303126 \n5 2023-03-19 00:00:03.653126 \n6 2023-03-19 00:00:03.523130 \n7 2023-03-19 00:00:04.253130 \n8 2023-03-19 00:00:00.313130 \n9 2023-03-19 00:00:01.593131 \n10 2023-03-19 00:00:04.163131 \n11 2023-03-19 00:00:05.063131 \n12 2023-03-19 00:00:01.763133 \n13 2023-03-19 00:00:02.463133 \n14 2023-03-19 00:00:02.363133 \n15 2023-03-19 00:00:02.470000 \n16 2023-03-19 00:00:00.550000 \n17 2023-03-19 00:00:03.820000 \n18 2023-03-19 00:00:01.243131 \n19 2023-03-19 00:00:04.443131 \n20 2023-03-19 00:00:00.143131 \n21 2023-03-19 00:00:01.580000 \n22 2023-03-19 00:00:00.820000 \n23 2023-03-19 00:00:03.830000 ",
+ "text/html": "
\n\n
\n \n
\n
\n
network
\n
station
\n
location
\n
channel
\n
starttime
\n
endtime
\n
\n \n \n
\n
0
\n
NZ
\n
EDRZ
\n
10
\n
EHE
\n
2023-03-17 00:00:03.528394
\n
2023-03-19 00:00:00.098393
\n
\n
\n
1
\n
NZ
\n
EDRZ
\n
10
\n
EHN
\n
2023-03-17 00:00:05.458394
\n
2023-03-19 00:00:04.518393
\n
\n
\n
2
\n
NZ
\n
EDRZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:03.528394
\n
2023-03-19 00:00:03.588393
\n
\n
\n
3
\n
NZ
\n
KARZ
\n
10
\n
EHE
\n
2023-03-17 00:00:02.963130
\n
2023-03-19 00:00:01.273126
\n
\n
\n
4
\n
NZ
\n
KARZ
\n
10
\n
EHN
\n
2023-03-17 00:00:00.093130
\n
2023-03-19 00:00:00.303126
\n
\n
\n
5
\n
NZ
\n
KARZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:05.823130
\n
2023-03-19 00:00:03.653126
\n
\n
\n
6
\n
NZ
\n
LIRZ
\n
10
\n
EHE
\n
2023-03-17 00:00:01.753132
\n
2023-03-19 00:00:03.523130
\n
\n
\n
7
\n
NZ
\n
LIRZ
\n
10
\n
EHN
\n
2023-03-17 00:00:02.913132
\n
2023-03-19 00:00:04.253130
\n
\n
\n
8
\n
NZ
\n
LIRZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:01.463132
\n
2023-03-19 00:00:00.313130
\n
\n
\n
9
\n
NZ
\n
MARZ
\n
10
\n
EHE
\n
2023-03-17 00:00:01.553130
\n
2023-03-19 00:00:01.593131
\n
\n
\n
10
\n
NZ
\n
MARZ
\n
10
\n
EHN
\n
2023-03-17 00:00:01.683130
\n
2023-03-19 00:00:04.163131
\n
\n
\n
11
\n
NZ
\n
MARZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:00.963130
\n
2023-03-19 00:00:05.063131
\n
\n
\n
12
\n
NZ
\n
MKRZ
\n
10
\n
EHE
\n
2023-03-17 00:00:01.673129
\n
2023-03-19 00:00:01.763133
\n
\n
\n
13
\n
NZ
\n
MKRZ
\n
10
\n
EHN
\n
2023-03-17 00:00:00.143129
\n
2023-03-19 00:00:02.463133
\n
\n
\n
14
\n
NZ
\n
MKRZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:00.053129
\n
2023-03-19 00:00:02.363133
\n
\n
\n
15
\n
NZ
\n
OMRZ
\n
10
\n
EHE
\n
2023-03-17 00:00:02.740000
\n
2023-03-19 00:00:02.470000
\n
\n
\n
16
\n
NZ
\n
OMRZ
\n
10
\n
EHN
\n
2023-03-17 00:00:00.580000
\n
2023-03-19 00:00:00.550000
\n
\n
\n
17
\n
NZ
\n
OMRZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:04.110000
\n
2023-03-19 00:00:03.820000
\n
\n
\n
18
\n
NZ
\n
OPRZ
\n
10
\n
HHE
\n
2023-03-17 00:00:02.993132
\n
2023-03-19 00:00:01.243131
\n
\n
\n
19
\n
NZ
\n
OPRZ
\n
10
\n
HHN
\n
2023-03-17 00:00:03.473132
\n
2023-03-19 00:00:04.443131
\n
\n
\n
20
\n
NZ
\n
OPRZ
\n
10
\n
HHZ
\n
2023-03-17 00:00:01.963132
\n
2023-03-19 00:00:00.143131
\n
\n
\n
21
\n
NZ
\n
TARZ
\n
10
\n
EHE
\n
2023-03-17 00:00:01.850000
\n
2023-03-19 00:00:01.580000
\n
\n
\n
22
\n
NZ
\n
TARZ
\n
10
\n
EHN
\n
2023-03-17 00:00:00.760000
\n
2023-03-19 00:00:00.820000
\n
\n
\n
23
\n
NZ
\n
TARZ
\n
10
\n
EHZ
\n
2023-03-17 00:00:00.630000
\n
2023-03-19 00:00:03.830000
\n
\n \n
\n
"
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from obsplus import WaveBank\n",
+ "\n",
+ "bank = WaveBank(outdir)\n",
+ "\n",
+ "bank.get_availability_df()"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:19:56.010304625Z",
+ "start_time": "2023-12-01T00:19:54.151398148Z"
+ }
+ },
+ "id": "422d39dd855950a6"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Obsplus has now scanned the waveforms that we just downloaded and made a table\n",
+ "of what is there. Great. These `WaveBank` objects have a similar api to obspy\n",
+ "`Client` objects, so we can use them as a drop-in replacement.\n",
+ "\n",
+ "Now we are nearly ready to make some templates.\n",
+ "\n",
+ "## Template creation\n",
+ "\n",
+ "To make templates you need two things:\n",
+ "1. Continuous waveform data;\n",
+ "2. A catalogue of events with picks.\n",
+ "\n",
+ "We already have (1). For (2), the catalogue of events, we could use GeoNet picked\n",
+ "events, however if you have events with picks locally and want to use those\n",
+ "events as templates you should save those events in a format readable by obspy.\n",
+ "You can then skip ahead to read those picks back in.\n",
+ "\n",
+ "In the worst case scenario you have times that you know that you want your\n",
+ "template to start at, but they are not in any standard format readable by obspy,\n",
+ "you can construct events from scratch as below. Note in this example I am just\n",
+ "populating the picks as this is all we need. You do need to be careful about\n",
+ "the `waveform_id`: this should match the seed id of the continuous data\n",
+ "exactly, otherwise the picks will not be used."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "9f4dd2504fb18202"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "outputs": [],
+ "source": [
+ "from obspy.core.event import (\n",
+ " Catalog, Event, Pick, WaveformStreamID)\n",
+ "from obspy import UTCDateTime\n",
+ " \n",
+ "# Make the picks for the event:\n",
+ "picks = [\n",
+ " Pick(\n",
+ " time=UTCDateTime(2023, 3, 18, 7, 46, 15, 593125),\n",
+ "\t waveform_id=WaveformStreamID(\n",
+ " network_code='NZ', station_code='MARZ', \n",
+ " channel_code='EHZ', location_code='10'),\n",
+ " phase_hint='P'),\n",
+ " Pick(\n",
+ " time=UTCDateTime(2023, 3, 18, 7, 46, 17, 633115),\n",
+ "\t waveform_id=WaveformStreamID(\n",
+ " network_code='NZ', station_code='MKRZ', \n",
+ " channel_code='EHZ', location_code='10'),\n",
+ " phase_hint='P'),\n",
+ " Pick(\n",
+ " time=UTCDateTime(2023, 3, 18, 7, 46, 18, 110000),\n",
+ "\t waveform_id=WaveformStreamID(\n",
+ " network_code='NZ', station_code='OMRZ', \n",
+ " channel_code='EHZ', location_code='10'),\n",
+ " phase_hint='P'),\n",
+ "] \n",
+ "# Add as many picks as you have - you might want to loop \n",
+ "# and/or make a function to pasre your picks to obspy Picks.\n",
+ "\n",
+ "# Make the event\n",
+ "event = Event(picks=picks)\n",
+ "# Make the catalog\n",
+ "catalog = Catalog([event])"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:19:56.010572545Z",
+ "start_time": "2023-12-01T00:19:56.004559606Z"
+ }
+ },
+ "id": "cf480f12b889f227"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "For this example we are going to use a catalogue of events picked by GeoNet - we will download those data and write them to disk to mimic you using local files:"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "d482d389c5260ca6"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "outputs": [],
+ "source": [
+ "from obspy.clients.fdsn import Client\n",
+ "\n",
+ "client = Client(\"GEONET\")\n",
+ "\n",
+ "cat = client.get_events(\n",
+ " starttime=UTCDateTime(2023, 3, 17),\n",
+ " endtime=UTCDateTime(2023, 3, 19),\n",
+ " latitude=-38.05, longitude=176.73, \n",
+ " maxradius=0.5, minmagnitude=3.0) # Limited set of relevent events\n",
+ "\n",
+ "cat.write(\"tutorial_catalog.xml\", format=\"QUAKEML\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:20:11.091149707Z",
+ "start_time": "2023-12-01T00:19:56.007304525Z"
+ }
+ },
+ "id": "70ec5a6c46c54884"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Template creation with local files\n",
+ "\n",
+ "Now that we have the events and waveforms we need, we can make our Tribe of templates.\n",
+ "\n",
+ "First we have to read in the events that we want to use as templates:"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "2d8590e8fd52ed0c"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "49 Event(s) in Catalog:\n",
+ "2023-03-17T14:29:34.921582Z | -38.067, +176.689 | 3.40 MLv | manual\n",
+ "2023-03-17T14:56:17.215087Z | -38.061, +176.679 | 3.07 MLv | manual\n",
+ "...\n",
+ "2023-03-18T20:20:52.842474Z | -38.045, +176.734 | 3.17 MLv | manual\n",
+ "2023-03-18T21:42:39.943071Z | -38.051, +176.735 | 4.25 MLv | manual\n",
+ "To see all events call 'print(CatalogObject.__str__(print_all=True))'\n"
+ ]
+ }
+ ],
+ "source": [
+ "from obspy import read_events\n",
+ "\n",
+ "cat = read_events(\"tutorial_catalog.xml\")\n",
+ "print(cat)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:20:40.878853704Z",
+ "start_time": "2023-12-01T00:20:28.064446183Z"
+ }
+ },
+ "id": "bfce77794a5d15f8"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Pick curation\n",
+ "\n",
+ "You may want to limit what picks you actually use for your templates. Any picks that you provide will\n",
+ "be used for cutting waveforms - this may include amplitude picks! You should not need to restrict\n",
+ "what stations you have picks for, but it doesn't do any harm to.\n",
+ "\n",
+ "Below we select picks from the stations that we set earlier, and only P and S picks. We also limit\n",
+ "to only one P and one S pick per station - you may not want to do that, but it can get messy if you\n",
+ "have multiple picks of the same phase."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "d119b8c9c37581f5"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Matched-filter GPU is not compiled! Should be here: /home/chambeca/my_programs/Building/fast_matched_filter/fast_matched_filter/lib/matched_filter_GPU.so\n"
+ ]
+ }
+ ],
+ "source": [
+ "from eqcorrscan.utils.catalog_utils import filter_picks\n",
+ "\n",
+ "cat = filter_picks(\n",
+ " cat, \n",
+ " stations=stations, \n",
+ " phase_hints=[\"P\", \"S\"], \n",
+ " enforce_single_pick=\"earliest\") "
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:20:45.250392572Z",
+ "start_time": "2023-12-01T00:20:42.068040579Z"
+ }
+ },
+ "id": "7d9ac720ccf4aa2f"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Template creation decisions\n",
+ "\n",
+ "We now have everything needed to create a tribe of templates. At this point you\n",
+ "have to make some decisions about parameters:\n",
+ "1. What filters to use;\n",
+ "2. What sampling-rate to use;\n",
+ "3. How long your template waveforms should be;\n",
+ "4. When to start your template waveforms relative to your pick times;\n",
+ "5. Whether to use separate P and S windows or not.\n",
+ "\n",
+ "Your choices for 2 and 3 should be based somewhat on your choice of what filters \n",
+ "to use (1). There is little point in using a sampling-rate significantly above\n",
+ "2.5x your high-cut frequency (2.5x because off the roll-offs used in the\n",
+ "Butterworth filters used by EQcorrscan). Lower sampling-rates will result in\n",
+ "fewer correlations and hence faster compute time, but most of the time in EQcorrscan's\n",
+ "matched-filter runs is spent in the pre-processing of the data rather than the\n",
+ "actual correlation computation.\n",
+ "\n",
+ "When deciding on filtering parameters you may find it helpful to look at what\n",
+ "frequencies have the best signal-to-noise ratio. There are functions in the\n",
+ "eqcorrscan.utils.plotting module to help with this.\n",
+ "\n",
+ "We will use some relatively standard, but un-tuned parameters for this example.\n",
+ "You should spend some time deciding: these decisions strongly affect the quality\n",
+ "of your results. You can also set the minimum signal-to-noise ratio for traces\n",
+ "to be included in your templates. Again, this should be tested.\n",
+ "\n",
+ "The `process_len` parameter controls how much data will be processed at once.\n",
+ "Because EQocrrscan computes resampling in the frequency domain, and can compute\n",
+ "correlations in the frequency domain, changing this length between construction \n",
+ "and detection affects the accuracy of the Fourier transforms which affects the\n",
+ "final correlations. For this reason the `process_len` is maintained throughout\n",
+ "the workflow by EQcorrscan. Here we use one hour (3600 seconds), but it is common\n",
+ "to use one day (86400 seconds).\n",
+ "\n",
+ "You will note that we use the `from_client` method for construction: this is\n",
+ "because we have a `WaveBank` that emulates a client making this really simple."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "df11e06b7dce0cce"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2023-12-01 13:20:51,205\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.876741406237065 below threshold for KARZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,206\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.KARZ.10.EHZ\n",
+ "2023-12-01 13:20:51,208\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.9540223699044974 below threshold for OMRZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,209\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+ "2023-12-01 13:20:51,274\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.5737322242199878 below threshold for LIRZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,275\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+ "2023-12-01 13:20:51,277\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.8545790599562736 below threshold for MKRZ.EHN, not using\n",
+ "2023-12-01 13:20:51,278\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MKRZ.10.EHN\n",
+ "2023-12-01 13:20:51,280\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.5224417059935123 below threshold for OMRZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,280\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+ "2023-12-01 13:20:51,283\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.9704676413162124 below threshold for TARZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,283\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.TARZ.10.EHZ\n",
+ "2023-12-01 13:20:51,315\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.6693293207326403 below threshold for LIRZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,316\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+ "2023-12-01 13:20:51,318\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.241518566731996 below threshold for OMRZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,319\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+ "2023-12-01 13:20:51,322\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.657470431167782 below threshold for TARZ.EHZ, not using\n",
+ "2023-12-01 13:20:51,322\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.TARZ.10.EHZ\n",
+ "2023-12-01 13:20:52,446\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.5290305215816998 below threshold for LIRZ.EHZ, not using\n",
+ "2023-12-01 13:20:52,446\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+ "2023-12-01 13:20:52,448\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.0144665058782114 below threshold for TARZ.EHZ, not using\n",
+ "2023-12-01 13:20:52,448\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.TARZ.10.EHZ\n",
+ "2023-12-01 13:20:53,107\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.400855581315436 below threshold for LIRZ.EHZ, not using\n",
+ "2023-12-01 13:20:53,108\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+ "2023-12-01 13:20:53,109\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.3176851693587057 below threshold for MKRZ.EHZ, not using\n",
+ "2023-12-01 13:20:53,110\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MKRZ.10.EHZ\n",
+ "2023-12-01 13:20:53,112\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.3894705992621912 below threshold for OMRZ.EHE, not using\n",
+ "2023-12-01 13:20:53,112\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHE\n",
+ "2023-12-01 13:20:53,114\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.884291218472121 below threshold for OPRZ.HHZ, not using\n",
+ "2023-12-01 13:20:53,115\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OPRZ.10.HHZ\n",
+ "2023-12-01 13:20:53,173\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.1825458574844174 below threshold for MARZ.EHN, not using\n",
+ "2023-12-01 13:20:53,174\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MARZ.10.EHN\n",
+ "2023-12-01 13:20:53,177\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.851222353660108 below threshold for OPRZ.HHN, not using\n",
+ "2023-12-01 13:20:53,178\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OPRZ.10.HHN\n",
+ "2023-12-01 13:20:53,697\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.4135717198623312 below threshold for KARZ.EHZ, not using\n",
+ "2023-12-01 13:20:53,697\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.KARZ.10.EHZ\n",
+ "2023-12-01 13:20:53,699\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.3877145548728342 below threshold for LIRZ.EHE, not using\n",
+ "2023-12-01 13:20:53,699\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHE\n",
+ "2023-12-01 13:20:53,701\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.1370695659576338 below threshold for MARZ.EHZ, not using\n",
+ "2023-12-01 13:20:53,701\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MARZ.10.EHZ\n",
+ "2023-12-01 13:20:53,703\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.22436151966111806 below threshold for OMRZ.EHZ, not using\n",
+ "2023-12-01 13:20:53,704\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+ "2023-12-01 13:20:53,705\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.487347592301636 below threshold for OPRZ.HHZ, not using\n",
+ "2023-12-01 13:20:53,706\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OPRZ.10.HHZ\n",
+ "2023-12-01 13:20:57,350\teqcorrscan.core.match_filter.tribe\tERROR\tEmpty Template\n",
+ "2023-12-01 13:20:57,352\teqcorrscan.core.match_filter.tribe\tERROR\tEmpty Template\n"
+ ]
+ }
+ ],
+ "source": [
+ "from eqcorrscan import Tribe\n",
+ "\n",
+ "tribe = Tribe().construct(\n",
+ " method=\"from_client\",\n",
+ " client_id=bank,\n",
+ " catalog=cat,\n",
+ " lowcut=2.0,\n",
+ " highcut=15.0,\n",
+ " samp_rate=50.0,\n",
+ " filt_order=4,\n",
+ " length=3.0,\n",
+ " prepick=0.5,\n",
+ " swin=\"all\",\n",
+ " process_len=3600,\n",
+ " all_horix=True,\n",
+ " min_snr=4.0,\n",
+ " parallel=True\n",
+ ")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:20:57.362410907Z",
+ "start_time": "2023-12-01T00:20:49.826135441Z"
+ }
+ },
+ "id": "fb993ce01cf4377a"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "You should see an ERROR message about empty templates: some of the events in our catalog\n",
+ "do not have useful data in our wavebank. We might want to set a minimum number of stations\n",
+ "used for our templates to ensure that our templates are all of reasonable quality.\n",
+ "In this case we will only retain templates with at least five stations:"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "474f94eb5efa59d5"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tribe of 33 templates\n"
+ ]
+ }
+ ],
+ "source": [
+ "tribe.templates = [t for t in tribe if len({tr.stats.station for tr in t.st}) >= 5]\n",
+ "print(tribe)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:20:59.634894692Z",
+ "start_time": "2023-12-01T00:20:59.629687355Z"
+ }
+ },
+ "id": "aa7ce144ced8823a"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Matched-filter detection\n",
+ "\n",
+ "Now that we have our tribe we can use it to detect new earthquakes. Again we\n",
+ "will make use of our local `WaveBank`. This is preferred to feeding one stream\n",
+ "of data to the code at a time for two reasons:\n",
+ "1. EQcorrscan will overlap your chunks of data (in this can every hour of data)\n",
+ " to ensure that all of the data have correlations from all channels after the\n",
+ " delay-and-stack correlation sums.\n",
+ "2. EQcorrscan can pre-emptively process the next chunks data in parallel while\n",
+ " computing detections in the current chunk. This can significantly speed up\n",
+ " processing, and makes better use of compute resources.\n",
+ "\n",
+ "\n",
+ "The main decisions that you have to make at this stage are around thresholds.\n",
+ "Generally it is better to start with a relatively low threshold: you can increase\n",
+ "the threshold later using the `Party.rethreshold` method, but you can't lower\n",
+ "it without re-running the whole detection workflow.\n",
+ "\n",
+ "It is common to use `MAD` thresholding, but you should experiment with your\n",
+ "dataset to see what works best."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "231442eea4647717"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2023-12-01 13:21:02,031\teqcorrscan.core.match_filter.tribe\tWARNING\tUsing concurrent_processing=True can be faster ifdownloading your data takes a long time. See https://github.com/eqcorrscan/EQcorrscan/pull/544for benchmarks.\n",
+ "2023-12-01 13:22:27,562\teqcorrscan.core.match_filter.helpers.tribe\tWARNING\tRemoved data for NZ.EDRZ.10.EHE NZ.EDRZ.10.EHN NZ.EDRZ.10.EHZ NZ.KARZ.10.EHE NZ.KARZ.10.EHN NZ.KARZ.10.EHZ NZ.LIRZ.10.EHE NZ.LIRZ.10.EHN NZ.LIRZ.10.EHZ NZ.MARZ.10.EHE NZ.MARZ.10.EHN NZ.MARZ.10.EHZ NZ.MKRZ.10.EHE NZ.MKRZ.10.EHN NZ.MKRZ.10.EHZ NZ.OMRZ.10.EHE NZ.OMRZ.10.EHN NZ.OMRZ.10.EHZ NZ.OPRZ.10.HHE NZ.OPRZ.10.HHN NZ.OPRZ.10.HHZ NZ.TARZ.10.EHN NZ.TARZ.10.EHZ due to less than 80% of the required length.\n",
+ "2023-12-01 13:22:27,563\teqcorrscan.core.match_filter.tribe\tWARNING\tNo suitable data between 2023-03-18T23:50:43.199856Z and 2023-03-19T00:51:03.199856Z, skipping\n"
+ ]
+ }
+ ],
+ "source": [
+ "party = tribe.client_detect(\n",
+ " client=bank,\n",
+ " starttime=starttime,\n",
+ " endtime=endtime,\n",
+ " threshold=10.0,\n",
+ " threshold_type=\"MAD\",\n",
+ " trig_int=1.0,\n",
+ ")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:22:27.605964306Z",
+ "start_time": "2023-12-01T00:21:02.016248197Z"
+ }
+ },
+ "id": "b7f7332ffa3ffde3"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "125\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(len(party))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-12-01T00:42:38.678673425Z",
+ "start_time": "2023-12-01T00:42:38.668416131Z"
+ }
+ },
+ "id": "45dd26bad62d8e17"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "