diff --git a/.coveragerc b/.coveragerc
index 59cec5006..9ef2f6226 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,6 +1,7 @@
 [run]
 branch = True
 source = eqcorrscan
+concurrency = multiprocessing,thread
 omit =
   eqcorrscan/__init__.py
   eqcorrscan/*/__init__.py
diff --git a/.github/test_conda_env.yml b/.github/test_conda_env.yml
index 8ff5c492f..31666f8a4 100644
--- a/.github/test_conda_env.yml
+++ b/.github/test_conda_env.yml
@@ -5,9 +5,9 @@ channels:
 dependencies:
   - numpy>=1.12
   - matplotlib>=1.3.0
-  - scipy>=0.18,<1.9.0  # Pinned due to scipy/obspy hanning renaming
+  - scipy
   - mock
-  - obspy>=1.3.0
+  - obspy>=1.4.0
   - h5py
   - pyyaml
   - bottleneck
@@ -17,8 +17,6 @@ dependencies:
   - pytest-pep8
   - pytest-xdist
   - pytest-rerunfailures
-  - pytest-mpl<0.16.0
+  - pytest-mpl
   - codecov
-  - pip
-  - pip:
-    - pyfftw
+  - boto3
diff --git a/.github/test_conda_env_macOS.yml b/.github/test_conda_env_macOS.yml
index 3282a48c6..62bc683cd 100644
--- a/.github/test_conda_env_macOS.yml
+++ b/.github/test_conda_env_macOS.yml
@@ -1,23 +1,28 @@
 name: eqcorrscan-test
 channels:
   - conda-forge
-  - defaults
 dependencies:
-  - clangdev>=4
-  - openmp>=4
-  - libcxx>=4
-  - cctools
-  - clang
+  # Compiler bits to match conda-forge build env
+  - cctools_osx-64
+  - clang=14
+  - clang-14
   - clang_osx-64
+  - llvm-openmp
+  - clangxx=14
   - compiler-rt
-  - libcxx
-  - llvm-openmp>=4.0.1
+  - compiler-rt_osx-64
+  - ld64_osx-64
+  - libclang-cpp14
+  - libcxx=14
+  - libllvm14
+  - llvm-tools  
+  # Normal requirements
   - numpy>=1.12
   - matplotlib>=1.3.0
-  - scipy>=0.18,<1.9.0  # Pinned due to scipy/obspy hanning renaming
+  - scipy
   - mock
-  - obspy>=1.3.0
-  - h5py<3.2  # Issue with dep resolution: https://github.com/conda-forge/h5py-feedstock/issues/92
+  - obspy>=1.4.0
+  - h5py>3.3  # Issue with dep resolution: https://github.com/conda-forge/h5py-feedstock/issues/92
   - pyyaml
   - bottleneck
   - fftw
@@ -28,6 +33,4 @@ dependencies:
   - pytest-rerunfailures
   - pytest-mpl<0.16.0
   - codecov
-  - pip
-  - pip:
-    - pyfftw
+  - boto3
diff --git a/.github/workflows/flake8-linter-matcher.json b/.github/workflows/flake8-linter-matcher.json
new file mode 100644
index 000000000..58c5dabca
--- /dev/null
+++ b/.github/workflows/flake8-linter-matcher.json
@@ -0,0 +1,17 @@
+{
+    "problemMatcher": [
+        {
+            "owner": "flake8-linter-error",
+            "severity": "error",
+            "pattern": [
+                {
+                    "regexp": "^([^:]+):(\\d+):(\\d+):\\s+([EWCNF]\\d+\\s+.+)$",
+                    "file": 1,
+                    "line": 2,
+                    "column": 3,
+                    "message": 4
+                }
+            ]
+        }
+    ]
+}
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
new file mode 100644
index 000000000..f2b1f9676
--- /dev/null
+++ b/.github/workflows/flake8.yml
@@ -0,0 +1,27 @@
+name: Flake8 Linter
+on:
+  pull_request:
+jobs:
+  flake8-linter:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v3
+      - name: Install Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install Flake8 5.0.4 linter
+        run: pip install flake8==5.0.4  # use this version for --diff option
+      - name: Setup Flake8 output matcher for PR annotations
+        run: echo '::add-matcher::.github/workflows/flake8-linter-matcher.json'
+      - name: Fetch pull request target branch
+        run: |
+          git remote add upstream https://github.com/eqcorrscan/EQcorrscan.git
+          git fetch upstream $GITHUB_BASE_REF
+      - name: Run Flake8 linter
+        run: git diff upstream/$GITHUB_BASE_REF HEAD | flake8
+          --exclude eqcorrscan/doc
+          --ignore W605,W504,W503
+          --max-line-length 80
+          --diff
diff --git a/.github/workflows/runtest.yml b/.github/workflows/runtest.yml
index 49408e66b..17337e5fa 100644
--- a/.github/workflows/runtest.yml
+++ b/.github/workflows/runtest.yml
@@ -8,12 +8,12 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['3.7', '3.8', '3.9']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
 #      continue-on-error: true
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
       - name: Get conda env file
         shell: bash -l {0}
@@ -22,31 +22,22 @@ jobs:
               cp .github/test_conda_env_macOS.yml .github/test_conda_env.yml
           fi
 
-      - name: Cache conda
-        uses: actions/cache@v2
-        env:
-          # Increase this value to reset cache if needed by env file has not changed
-          CACHE_NUMBER: 0
-        with:
-          path: ~/conda_pkgs_dir
-          key:
-            ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-            hashFiles('.github/test_conda_env.yml') }}
-
       - name: Setup conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v2.1.1
         with:
-          miniconda-version: 'latest'
+          miniforge-variant: Mambaforge
+          miniforge-version: latest
           python-version: ${{ matrix.python-version }}
           activate-environment: eqcorrscan-test
-          environment-file: .github/test_conda_env.yml
-          condarc-file: .github/test_condarc.yml
-          use-only-tar-bz2: true  # Must be set for caching to work properly
+          use-mamba: true
+          
+      - name: Update Env
+        run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml
 
       - name: install eqcorrscan
         shell: bash -l {0}
         run: |
-          pip install -e .
+          pip install -v -e . --no-deps
 
       - name: print package info
         shell: bash -l {0}
@@ -65,7 +56,7 @@ jobs:
       - name: run main test suite
         shell: bash -l {0}
         run: |
-          py.test -n 2 -m "not serial and not network and not superslow" --cov-report=xml --dist loadscope
+          py.test -n 2 -m "not serial and not network and not superslow and not slow" --cov-report=xml --dist loadscope
 
       - name: run serial test
         if: always()
@@ -81,7 +72,7 @@ jobs:
           py.test -v -m "slow and not serial and not network" --cov-report=xml --cov-append
 
       - name: upload coverage
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           file: ./coverage.xml
@@ -95,37 +86,28 @@ jobs:
     runs-on: "ubuntu-latest"
     strategy:
       matrix:
-        python-version: [3.8]
+        python-version: ['3.10']
       fail-fast: false
 
     steps:
-      - uses: actions/checkout@v2
-
-      - name: Cache conda
-        uses: actions/cache@v2
-        env:
-          # Increase this value to reset cache if needed by env file has not changed
-          CACHE_NUMBER: 0
-        with:
-          path: ~/conda_pkgs_dir
-          key:
-            ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-            hashFiles('.github/test_conda_env.yml') }}
+      - uses: actions/checkout@v3
 
       - name: Setup conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v2.1.1
         with:
-          miniconda-version: 'latest'
+          miniforge-variant: Mambaforge
+          miniforge-version: latest
           python-version: ${{ matrix.python-version }}
           activate-environment: eqcorrscan-test
-          environment-file: .github/test_conda_env.yml
-          condarc-file: .github/test_condarc.yml
-          use-only-tar-bz2: true  # Must be set for caching to work properly
+          use-mamba: true
+          
+      - name: Update Env
+        run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml
 
       - name: install eqcorrscan
         shell: bash -l {0}
         run: |
-          pip install -e .
+          pip install -e . --no-deps
 
       - name: print package info
         shell: bash -l {0}
@@ -136,7 +118,7 @@ jobs:
       - name: run network tests
         shell: bash -l {0}
         run: |
-          py.test -n 2 -m "network" --cov-report=xml
+          py.test -n 2 -m "network and not superslow" --cov-report=xml
       - name: run tutorials
         if: always()
         shell: bash -l {0}
@@ -149,7 +131,7 @@ jobs:
           py.test -m "superslow" -s eqcorrscan/tests/tutorials_test.py eqcorrscan/tests/subspace_test.py --cov-report=xml --cov-append
 
       - name: upload coverage
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           file: ./coverage.xml
@@ -162,32 +144,23 @@ jobs:
     runs-on: "ubuntu-latest"
     strategy:
       matrix:
-        python-version: [3.8]
+        python-version: ['3.9']
       fail-fast: false
 
     steps:
-      - uses: actions/checkout@v2
-
-      - name: Cache conda
-        uses: actions/cache@v2
-        env:
-          # Increase this value to reset cache if needed by env file has not changed
-          CACHE_NUMBER: 0
-        with:
-          path: ~/conda_pkgs_dir
-          key:
-            ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-            hashFiles('.github/test_conda_env.yml') }}
-
+      - uses: actions/checkout@v3
+      
       - name: Setup conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v2.1.1
         with:
-          miniconda-version: 'latest'
+          miniforge-variant: Mambaforge
+          miniforge-version: latest
           python-version: ${{ matrix.python-version }}
           activate-environment: eqcorrscan-test
-          environment-file: .github/test_conda_env.yml
-          condarc-file: .github/test_condarc.yml
-          use-only-tar-bz2: true  # Must be set for caching to work properly
+          use-mamba: true
+          
+      - name: Update Env
+        run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml
 
       - name: install fmf
         shell: bash -l {0}
@@ -195,7 +168,7 @@ jobs:
           cd ..
           git clone https://github.com/beridel/fast_matched_filter.git
           cd fast_matched_filter
-          pip install -e .
+          pip install -e . --no-deps
           cd ../EQcorrscan
 
       - name: install eqcorrscan
@@ -216,7 +189,7 @@ jobs:
           py.test eqcorrscan/tests/correlate_test.py --cov-report=xml
 
       - name: upload coverage
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           file: ./coverage.xml
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 63dcc2610..3cd235e87 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -16,7 +16,8 @@ sphinx:
   configuration: eqcorrscan/doc/conf.py
 
 # Optionally build your docs in additional formats such as PDF and ePub
-formats: all
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html#formats
+formats: []
 
 # Optionally set the version of Python and requirements required to build your docs
 python:
diff --git a/.stickler.yml b/.stickler.yml
deleted file mode 100644
index 5e8537b49..000000000
--- a/.stickler.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-linters:
-  flake8:
-    python: 3
-
-files:
-  ignore:
-    - 'eqcorrscan/doc/*'
diff --git a/CHANGES.md b/CHANGES.md
index 74ec346bd..e49cdf69e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,4 +1,49 @@
 ## Current
+* core.match_filter.tribe
+  - Significant re-write of detect logic to take advantage of parallel steps (see #544)
+  - Significant re-structure of hidden functions.
+* core.match_filter.matched_filter
+  - 5x speed up for MAD threshold calculation with parallel (threaded) MAD 
+    calculation (#531).
+* core.match_filter.detect
+  - 1000x speedup for retrieving unique detections for all templates.
+  - 30x speedup in handling detections (50x speedup in selecting detections,
+    4x speedup in adding prepick time)
+* core.match_filter.template
+  - new quick_group_templates function for 50x quicker template grouping.
+  - Templates with nan channels will be considered equal to other templates with shared
+  nan channels.
+  - New grouping strategy to minimise nan-channels - templates are grouped by
+  similar seed-ids. This should speed up both correlations and 
+  prep_data_for_correlation. See PR #457.
+* utils.pre_processing
+  - `_prep_data_for_correlation`: 3x speedup for filling NaN-traces in templates
+  - New function ``quick_trace_select` for a very efficient selection of trace
+    by seed ID without wildcards (4x speedup).
+  - `process`, `dayproc` and `shortproc` replaced by `multi_process`. Deprecation
+    warning added.
+  - `multi_process` implements multithreaded GIL-releasing parallelism of slow 
+    sections (detrending, resampling and filtering) of the processing workflow. 
+    Multiprocessing is no longer supported or needed for processing. See PR #540 
+    for benchmarks. New approach is slightly faster overall, and significantly 
+    more memory efficeint (uses c. 6x less memory than old multiprocessing approach 
+    on a 12 core machine)
+* utils.correlate
+  - 25 % speedup for `_get_array_dicts` with quicker access to properties.
+* utils.catalog_to_dd
+  - _prepare_stream
+    - Now more consistently slices templates to length = extract_len * samp_rate
+      so that user receives less warnings about insufficient data.
+  - write_correlations
+    - New option `use_shared_memory` to speed up correlation of many events by
+      ca. 20 % by moving trace data into shared memory.
+    - Add ability to weight correlations by raw correlation rather than just
+      correlation squared.
+* utils.cluster.decluster_distance_time
+  - Bug-fix: fix segmentation fault when declustering more than 46340 detections
+    with hypocentral_separation.
+
+## 0.4.4
 * core.match_filter
   - Bug-fix: peak-cores could be defined twice in _group_detect through kwargs.
     Fix: only update peak_cores if it isn't there already.
@@ -9,9 +54,17 @@
 * core.lag_calc._xcorr_interp
  - CC-interpolation replaced with resampling (more robust), old method
    deprecated. Use new method with use_new_resamp_method=True as **kwarg.
-* core.lag_calc:
+* core.lag_calc
+ - Added new option all_vert to transfer P-picks to all channels defined as
+   vertical_chans.
+ - Made usage of all_vert, all_horiz consistent across the lag_calc.
  - Fixed bug where minimum CC defined via min_cc_from_mean_cc_factor was not
    set correctly for negative correlation sums.
+* core.template_gen
+ - Added new option all_vert to transfer P-picks to all channels defined as
+   vertical_chans.
+ - Made handling of horizontal_chans and vertical_chans consistent so that user
+   can freely choose relevant channels.
 * utils.correlate
  - Fast Matched Filter now supported natively for version >= 1.4.0
  - Only full correlation stacks are returned now (e.g. where fewer than than
diff --git a/README.md b/README.md
index 9021d6310..bce837bbb 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 ## A python package for the detection and analysis of repeating and near-repeating earthquakes.
 
 ## Citation:
-We have a manuscript on the development of EQcorrscan, if you make use of EQcorrscan please cite the folloing paper:
+We have a manuscript on the development of EQcorrscan, if you make use of EQcorrscan please cite the following paper:
 
 Chamberlain, C. J., Hopp, C. J., Boese, C. M., Warren-Smith, E., Chambers, D., Chu, S. X., Michailos, K., Townend, J., [EQcorrscan: Repeating and near-repeating earthquake detection and analysis in Python.](https://pubs.geoscienceworld.org/ssa/srl/article/89/1/173/524875/eqcorrscan-repeating-and-near-repeating-earthquake) Seismological Research Letters *2017*
 
@@ -89,3 +89,16 @@ Note that tests for travis and appveyor are run daily on master as cron jobs, an
 This package is written  and maintained by the EQcorrscan developers,
 and is distributed under the LGPL GNU License, 
 Copyright EQcorrscan developers 2018.
+
+
+# Funding
+
+![RCET](eqcorrscan/doc/RCET_logo_transparent.png)
+
+Continued development of the EQcorrscan package is directly supported by the 
+[RCET](https://www.rcet.science/), Rapid Characterisation of Earthquakes and Tsunami
+programme funded by the New Zealand Ministry of Business, Innovation and Employment
+Endeavour fund.
+
+Development is indirectly funded by grants from [Toku Tū Ake: EQC](https://www.eqc.govt.nz/)
+and a [Rutherford Discovery Fellowship](https://www.royalsociety.org.nz/what-we-do/funds-and-opportunities/rutherford-discovery-fellowships/rutherford-discovery-fellowship-recipients/calum-chamberlain/).
diff --git a/conftest.py b/conftest.py
index 0549ad04f..4da04679d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,5 +1,6 @@
 import os
 import shutil
+import glob
 from os.path import join, dirname
 
 import pytest
@@ -58,7 +59,8 @@ def clean_up_test_files():
         'dt.cc2',
         'dt.ct',
         'dt.ct2',
-        'phase.dat'
+        'phase.dat',
+        'eqcorrscan_temporary_party.pkl'
     ]
 
     yield
@@ -85,7 +87,11 @@ def clean_up_test_directories():
         'test_tar_write',
         'tmp1',
         'cc_exported',
+        '.streams',
+        '.parties'
     ]
+    directories_to_kill.extend(glob.glob(".template_db_*"))
+    directories_to_kill.extend(glob.glob(".streams_*"))
 
     yield
 
@@ -93,6 +99,7 @@ def clean_up_test_directories():
     for directory in directories_to_kill:
         if os.path.isdir(directory):
             try:
+                print(f"Removing directory {directory}")
                 shutil.rmtree(directory)
             except Exception as e:
                 print("Could not find directory, already cleaned?")
diff --git a/eqcorrscan/core/lag_calc.py b/eqcorrscan/core/lag_calc.py
index dd71efc36..09adb6740 100644
--- a/eqcorrscan/core/lag_calc.py
+++ b/eqcorrscan/core/lag_calc.py
@@ -24,6 +24,7 @@
 from eqcorrscan.core.match_filter.family import Family
 from eqcorrscan.core.match_filter.template import Template
 from eqcorrscan.utils.plotting import plot_repicked
+from eqcorrscan.utils.pre_processing import _stream_quick_select
 
 show_interp_deprec_warning = True
 
@@ -168,7 +169,8 @@ def _concatenate_and_correlate(streams, template, cores):
     for i, chan in enumerate(chans):
         start_index = 0
         for j, stream in enumerate(streams):
-            tr = stream.select(id=chan)
+            # tr = stream.select(id=chan)
+            tr = _stream_quick_select(stream, chan)
             if len(tr) == 0:
                 # No data for this channel in this stream
                 used_chans[j].append(UsedChannel(
@@ -221,8 +223,9 @@ def _concatenate_and_correlate(streams, template, cores):
 
 def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4,
                       min_cc_from_mean_cc_factor=None,
+                      all_vert=False, all_horiz=False, vertical_chans=['Z'],
                       horizontal_chans=['E', 'N', '1', '2'],
-                      vertical_chans=['Z'], cores=1, interpolate=False,
+                      cores=1, interpolate=False,
                       plot=False, plotdir=None, export_cc=False, cc_dir=None,
                       **kwargs):
     """
@@ -279,7 +282,9 @@ def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4,
     picked_dict = {}
     delta = family.template.st[0].stats.delta
     detect_streams_dict = _prepare_data(
-        family=family, detect_data=stream, shift_len=shift_len)
+        family=family, detect_data=stream, shift_len=shift_len,
+        all_vert=all_vert, all_horiz=all_horiz, vertical_chans=vertical_chans,
+        horizontal_chans=horizontal_chans)
     detection_ids = list(detect_streams_dict.keys())
     detect_streams = [detect_streams_dict[detection_id]
                       for detection_id in detection_ids]
@@ -396,7 +401,9 @@ def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4,
     return picked_dict
 
 
-def _prepare_data(family, detect_data, shift_len):
+def _prepare_data(family, detect_data, shift_len, all_vert=False,
+                  all_horiz=False, vertical_chans=['Z'],
+                  horizontal_chans=['E', 'N', '1', '2']):
     """
     Prepare data for lag_calc - reduce memory here.
 
@@ -423,7 +430,9 @@ def _prepare_data(family, detect_data, shift_len):
                     "samples".format(length))
     prepick = shift_len + family.template.prepick
     detect_streams_dict = family.extract_streams(
-        stream=detect_data, length=length, prepick=prepick)
+        stream=detect_data, length=length, prepick=prepick,
+        all_vert=all_vert, all_horiz=all_horiz, vertical_chans=vertical_chans,
+        horizontal_chans=horizontal_chans)
     for key, detect_stream in detect_streams_dict.items():
         # Split to remove trailing or leading masks
         for i in range(len(detect_stream) - 1, -1, -1):
@@ -451,6 +460,7 @@ def _prepare_data(family, detect_data, shift_len):
 
 def lag_calc(detections, detect_data, template_names, templates,
              shift_len=0.2, min_cc=0.4, min_cc_from_mean_cc_factor=None,
+             all_vert=False, all_horiz=False,
              horizontal_chans=['E', 'N', '1', '2'],
              vertical_chans=['Z'], cores=1, interpolate=False,
              plot=False, plotdir=None, export_cc=False, cc_dir=None, **kwargs):
@@ -597,6 +607,7 @@ def lag_calc(detections, detect_data, template_names, templates,
             template_dict = xcorr_pick_family(
                 family=family, stream=detect_data, min_cc=min_cc,
                 min_cc_from_mean_cc_factor=min_cc_from_mean_cc_factor,
+                all_vert=all_vert, all_horiz=all_horiz,
                 horizontal_chans=horizontal_chans,
                 vertical_chans=vertical_chans, interpolate=interpolate,
                 cores=cores, shift_len=shift_len, plot=plot, plotdir=plotdir,
diff --git a/eqcorrscan/core/match_filter/detection.py b/eqcorrscan/core/match_filter/detection.py
index 5fe689db3..9ac074ddb 100644
--- a/eqcorrscan/core/match_filter/detection.py
+++ b/eqcorrscan/core/match_filter/detection.py
@@ -23,6 +23,7 @@
     Origin)
 
 from eqcorrscan.core.match_filter.helpers import _test_event_similarity
+from eqcorrscan.utils.pre_processing import _stream_quick_select
 
 Logger = logging.getLogger(__name__)
 
@@ -67,10 +68,11 @@ class Detection(object):
     :type id: str
     :param id: Identification for detection (should be unique).
     """
+    _precision = 1e-5  # Used for warning about out of range correlations
 
     def __init__(self, template_name, detect_time, no_chans, detect_val,
                  threshold, typeofdet, threshold_type, threshold_input,
-                 chans=None, event=None, id=None):
+                 chans=None, event=None, id=None, strict=True):
         """Main class of Detection."""
         self.template_name = template_name
         self.detect_time = detect_time
@@ -93,7 +95,13 @@ def __init__(self, template_name, detect_time, no_chans, detect_val,
         if event is not None:
             event.resource_id = self.id
         if self.typeofdet == 'corr':
-            assert abs(self.detect_val) <= self.no_chans
+            if abs(self.detect_val) > self.no_chans * (1 + self._precision):
+                msg = (f"Correlation detection at {self.detect_val} exceeds "
+                       f"boundedness ({self.no_chans}")
+                if strict:
+                    raise OverflowError(msg)
+                else:
+                    Logger.error(msg)
 
     def __repr__(self):
         """Simple print."""
@@ -284,7 +292,8 @@ def _calculate_event(self, template=None, template_st=None,
                     new_pick.phase_hint = template_pick[0].phase_hint
                 else:
                     # Multiple picks for this trace in template
-                    similar_traces = template_st.select(id=tr.id)
+                    # similar_traces = template_st.select(id=tr.id)
+                    similar_traces = _stream_quick_select(template_st, tr.id)
                     similar_traces.sort()
                     _index = similar_traces.traces.index(tr)
                     try:
@@ -348,7 +357,9 @@ def _calculate_event(self, template=None, template_st=None,
         self.event = ev
         return self
 
-    def extract_stream(self, stream, length, prepick):
+    def extract_stream(self, stream, length, prepick, all_vert=False,
+                       all_horiz=False, vertical_chans=['Z'],
+                       horizontal_chans=['E', 'N', '1', '2']):
         """
         Extract a cut stream of a given length around the detection.
 
@@ -374,7 +385,17 @@ def extract_stream(self, stream, length, prepick):
             pick = [
                 p for p in self.event.picks
                 if p.waveform_id.station_code == station and
-                p.waveform_id.channel_code == channel]
+                p.waveform_id.channel_code[0:-1] == channel[0:-1]]
+            # Allow picks to be transferred to other vertical/horizontal chans
+            if all_vert and channel[-1] in vertical_chans:
+                pick = [p for p in pick
+                        if p.waveform_id.channel_code[-1] in vertical_chans]
+            elif all_horiz and channel[-1] in horizontal_chans:
+                pick = [p for p in pick
+                        if p.waveform_id.channel_code[-1] in horizontal_chans]
+            else:
+                pick = [p for p in pick
+                        if p.waveform_id.channel_code == channel]
             if len(pick) == 0:
                 Logger.info("No pick for {0}.{1}".format(station, channel))
                 continue
diff --git a/eqcorrscan/core/match_filter/family.py b/eqcorrscan/core/match_filter/family.py
index 332b02b8d..729a01355 100644
--- a/eqcorrscan/core/match_filter/family.py
+++ b/eqcorrscan/core/match_filter/family.py
@@ -18,14 +18,10 @@
 import logging
 
 from obspy import UTCDateTime, Stream, Catalog
-from obspy.core.event import (
-    StationMagnitude, Magnitude, ResourceIdentifier, WaveformStreamID,
-    CreationInfo, StationMagnitudeContribution)
 
-from eqcorrscan.core.match_filter.matched_filter import _group_process
+from eqcorrscan.utils.pre_processing import _group_process
 from eqcorrscan.core.match_filter.detection import Detection, get_catalog
 from eqcorrscan.utils.plotting import cumulative_detections
-from eqcorrscan.utils.mag_calc import relative_magnitude
 
 Logger = logging.getLogger(__name__)
 
@@ -507,8 +503,8 @@ def write(self, filename, format='tar', overwrite=False):
         return
 
     def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
-                 min_cc_from_mean_cc_factor=None,
-                 horizontal_chans=['E', 'N', '1', '2'], vertical_chans=['Z'],
+                 min_cc_from_mean_cc_factor=None, vertical_chans=['Z'],
+                 horizontal_chans=['E', 'N', '1', '2'],
                  cores=1, interpolate=False, plot=False, plotdir=None,
                  parallel=True, process_cores=None, ignore_length=False,
                  ignore_bad_data=False, export_cc=False, cc_dir=None,
@@ -766,10 +762,17 @@ def _process_streams(self, stream, pre_processed, process_cores=1,
             template_stream = stream
         if not pre_processed:
             processed_streams = _group_process(
-                template_group=[self.template], cores=process_cores,
-                parallel=parallel, stream=template_stream.merge().copy(),
-                daylong=False, ignore_length=ignore_length, overlap=0.0,
-                ignore_bad_data=ignore_bad_data)
+                filt_order=self.template.filt_order,
+                highcut=self.template.highcut,
+                lowcut=self.template.lowcut,
+                samp_rate=self.template.samp_rate,
+                process_length=self.template.process_length,
+                parallel=parallel,
+                cores=process_cores,
+                stream=template_stream.merge().copy(),
+                daylong=False,
+                ignore_length=ignore_length,
+                overlap=0.0, ignore_bad_data=ignore_bad_data)
             processed_stream = Stream()
             for p in processed_streams:
                 processed_stream += p
@@ -779,7 +782,9 @@ def _process_streams(self, stream, pre_processed, process_cores=1,
             processed_stream = stream.merge()
         return processed_stream.split()
 
-    def extract_streams(self, stream, length, prepick):
+    def extract_streams(self, stream, length, prepick, all_vert=False,
+                        all_horiz=False, vertical_chans=['Z'],
+                        horizontal_chans=['E', 'N', '1', '2']):
         """
         Generate a dictionary of cut streams around detections.
 
@@ -797,7 +802,9 @@ def extract_streams(self, stream, length, prepick):
         """
         # Splitting and merging to remove trailing and leading masks
         return {d.id: d.extract_stream(
-            stream=stream, length=length, prepick=prepick).split().merge()
+            stream=stream, length=length, prepick=prepick, all_vert=all_vert,
+            all_horiz=all_horiz, vertical_chans=vertical_chans,
+            horizontal_chans=horizontal_chans).split().merge()
             for d in self.detections}
 
 
diff --git a/eqcorrscan/core/match_filter/helpers/__init__.py b/eqcorrscan/core/match_filter/helpers/__init__.py
new file mode 100644
index 000000000..f212f44c2
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/__init__.py
@@ -0,0 +1,13 @@
+"""
+
+"""
+
+from .archive_access import (  # noqa: F401
+    _par_read, _resolved, _badpath, _badlink, _safemembers,
+    temporary_directory)
+from .matched_filter import (  # noqa: F401
+    _tr_spike_test, _spike_test, _total_microsec, _templates_match,
+    _test_event_similarity, _remove_duplicates, _moveout, _mad,
+    _pickle_stream, _unpickle_stream, extract_from_stream,
+    normxcorr2)
+from .clients import (get_waveform_client)  # noqa: F401
diff --git a/eqcorrscan/core/match_filter/helpers/archive_access.py b/eqcorrscan/core/match_filter/helpers/archive_access.py
new file mode 100644
index 000000000..b0f1a01d1
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/archive_access.py
@@ -0,0 +1,128 @@
+"""
+Helper functions for access to archives, e.g. Party, Tribe
+
+:copyright:
+    EQcorrscan developers.
+
+:license:
+    GNU Lesser General Public License, Version 3
+    (https://www.gnu.org/copyleft/lesser.html)
+"""
+
+import contextlib
+import os
+import shutil
+import tempfile
+import tarfile
+import logging
+
+
+Logger = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def temporary_directory():
+    """ make a temporary directory, yeild its name, cleanup on exit """
+    dir_name = tempfile.mkdtemp()
+    yield dir_name
+    if os.path.exists(dir_name):
+        shutil.rmtree(dir_name)
+
+
+def _par_read(dirname, compressed=True):
+    """
+    Internal write function to read a formatted parameter file.
+
+    :type dirname: str
+    :param dirname: Directory to read the parameter file from.
+    :type compressed: bool
+    :param compressed: Whether the directory is compressed or not.
+    """
+    from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
+    from eqcorrscan.core.match_filter.template import Template
+
+    templates = []
+    if compressed:
+        arc = tarfile.open(dirname, "r:*")
+        members = arc.getmembers()
+        _parfile = [member for member in members
+                    if member.name.split(os.sep)[-1] ==
+                    'template_parameters.csv']
+        if len(_parfile) == 0:
+            arc.close()
+            raise MatchFilterError(
+                'No template parameter file in archive')
+        parfile = arc.extractfile(_parfile[0])
+    else:
+        parfile = open(dirname + '/' + 'template_parameters.csv', 'r')
+    for line in parfile:
+        t_in = Template()
+        for key_pair in line.rstrip().split(','):
+            if key_pair.split(':')[0].strip() == 'name':
+                t_in.__dict__[key_pair.split(':')[0].strip()] = \
+                    key_pair.split(':')[-1].strip()
+            elif key_pair.split(':')[0].strip() == 'filt_order':
+                try:
+                    t_in.__dict__[key_pair.split(':')[0].strip()] = \
+                        int(key_pair.split(':')[-1])
+                except ValueError:
+                    pass
+            else:
+                try:
+                    t_in.__dict__[key_pair.split(':')[0].strip()] = \
+                        float(key_pair.split(':')[-1])
+                except ValueError:
+                    pass
+        templates.append(t_in)
+    parfile.close()
+    if compressed:
+        arc.close()
+    return templates
+
+
+def _resolved(x):
+    return os.path.realpath(os.path.abspath(x))
+
+
+def _badpath(path, base):
+    """
+    joinpath will ignore base if path is absolute.
+    """
+    return not _resolved(os.path.join(base, path)).startswith(base)
+
+
+def _badlink(info, base):
+    """
+    Links are interpreted relative to the directory containing the link
+    """
+    tip = _resolved(os.path.join(base, os.path.dirname(info.name)))
+    return _badpath(info.linkname, base=tip)
+
+
+def _safemembers(members):
+    """
+    Check members of a tar archive for safety.
+
+    Ensure that they do not contain paths or links outside of where we
+    need them - this would only happen if the archive wasn't made by
+    eqcorrscan.
+
+    :type members: :class:`tarfile.TarFile`
+    :param members: an open tarfile.
+    """
+    base = _resolved(".")
+
+    for finfo in members:
+        assert not _badpath(finfo.name, base), \
+            f"{finfo.name} is blocked (illegal path)"
+        if finfo.issym() or finfo.islnk():
+            assert not _badlink(finfo, base), \
+                f"{finfo.name} is blocked: Link to {finfo.linkname}"
+        else:
+            yield finfo
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/helpers/clients.py b/eqcorrscan/core/match_filter/helpers/clients.py
new file mode 100644
index 000000000..2360d7ae9
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/clients.py
@@ -0,0 +1,45 @@
+"""
+Helper functions for seismic data clients.
+
+:copyright:
+    EQcorrscan developers.
+
+:license:
+    GNU Lesser General Public License, Version 3
+    (https://www.gnu.org/copyleft/lesser.html)
+"""
+import logging
+
+from obspy import Stream
+
+
+Logger = logging.getLogger(__name__)
+
+
+def get_waveform_client(waveform_client):
+    """
+    Bind a `get_waveforms_bulk` method to client if it doesn't already have one
+
+    :param waveform_client: Obspy client with a `get_waveforms` method
+
+    :returns: waveform_client with `get_waveforms_bulk`.
+    """
+    def _get_waveforms_bulk_naive(self, bulk_arg):
+        """ naive implementation of get_waveforms_bulk that uses iteration. """
+        st = Stream()
+        for arg in bulk_arg:
+            st += self.get_waveforms(*arg)
+        return st
+
+    # add waveform_bulk method dynamically if it doesn't exist already
+    if not hasattr(waveform_client, "get_waveforms_bulk"):
+        bound_method = _get_waveforms_bulk_naive.__get__(waveform_client)
+        setattr(waveform_client, "get_waveforms_bulk", bound_method)
+
+    return waveform_client
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/helpers.py b/eqcorrscan/core/match_filter/helpers/matched_filter.py
similarity index 71%
rename from eqcorrscan/core/match_filter/helpers.py
rename to eqcorrscan/core/match_filter/helpers/matched_filter.py
index ef8b4c319..26abd5e48 100644
--- a/eqcorrscan/core/match_filter/helpers.py
+++ b/eqcorrscan/core/match_filter/helpers/matched_filter.py
@@ -8,14 +8,14 @@
     GNU Lesser General Public License, Version 3
     (https://www.gnu.org/copyleft/lesser.html)
 """
-import contextlib
+
 import os
-import shutil
-import tarfile
-import tempfile
 import logging
+import pickle
 
 import numpy as np
+
+from concurrent.futures import ThreadPoolExecutor
 from obspy import Stream
 from obspy.core.event import Event
 
@@ -25,36 +25,13 @@
 Logger = logging.getLogger(__name__)
 
 
-@contextlib.contextmanager
-def temporary_directory():
-    """ make a temporary directory, yeild its name, cleanup on exit """
-    dir_name = tempfile.mkdtemp()
-    yield dir_name
-    if os.path.exists(dir_name):
-        shutil.rmtree(dir_name)
-
-
-def get_waveform_client(waveform_client):
-    """
-    Bind a `get_waveforms_bulk` method to client if it doesn't already have one
-
-    :param waveform_client: Obspy client with a `get_waveforms` method
-
-    :returns: waveform_client with `get_waveforms_bulk`.
-    """
-    def _get_waveforms_bulk_naive(self, bulk_arg):
-        """ naive implementation of get_waveforms_bulk that uses iteration. """
-        st = Stream()
-        for arg in bulk_arg:
-            st += self.get_waveforms(*arg)
-        return st
-
-    # add waveform_bulk method dynamically if it doesn't exist already
-    if not hasattr(waveform_client, "get_waveforms_bulk"):
-        bound_method = _get_waveforms_bulk_naive.__get__(waveform_client)
-        setattr(waveform_client, "get_waveforms_bulk", bound_method)
-
-    return waveform_client
+def _tr_spike_test(data, percent, multiplier):
+    data_len = data.shape[0]
+    thresh = 2 * np.max(np.sort(
+        np.abs(data))[0:np.int64(percent * data_len)]) * multiplier
+    if (data > thresh).sum() > 0:
+        return True
+    return False
 
 
 def _spike_test(stream, percent=0.99, multiplier=1e7):
@@ -70,13 +47,15 @@ def _spike_test(stream, percent=0.99, multiplier=1e7):
     """
     from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
 
+    to_check = ((tr.data, percent, multiplier) for tr in stream)
     list_ids = []
-    for tr in stream:
-        if (tr.data > 2 * np.max(np.sort(
-                np.abs(tr.data))[0:int(percent * len(tr.data))]
-                                 ) * multiplier).sum() > 0:
-            list_ids.append(tr.id)
-    if list_ids != []:
+    with ThreadPoolExecutor() as executor:
+        for tr, spiked in zip(stream, executor.map(
+                lambda args: _tr_spike_test(*args), to_check)):
+            if spiked:
+                list_ids.append(tr.id)
+
+    if len(list_ids):
         ids = ', '.join(list_ids)
         msg = ('Spikes above ' + str(multiplier) +
                ' of the range of ' + str(percent) +
@@ -245,112 +224,71 @@ def _test_event_similarity(event_1, event_2, verbose=False, shallow=False):
                               ' %s' % (amp_1[key], amp_2[key], key))
                     return False
             elif key == "waveform_id":
-                if pick_1[key].station_code != pick_2[key].station_code:
+                if amp_1[key].station_code != amp_2[key].station_code:
                     if verbose:
                         print('Station codes do not match')
                     return False
-                if pick_1[key].channel_code[0] != pick_2[key].channel_code[0]:
+                if amp_1[key].channel_code[0] != amp_2[key].channel_code[0]:
                     if verbose:
                         print('Channel codes do not match')
                     return False
-                if pick_1[key].channel_code[-1] != \
-                        pick_2[key].channel_code[-1]:
+                if amp_1[key].channel_code[-1] != \
+                        amp_2[key].channel_code[-1]:
                     if verbose:
                         print('Channel codes do not match')
                     return False
     return True
 
 
-def _par_read(dirname, compressed=True):
-    """
-    Internal write function to read a formatted parameter file.
-
-    :type dirname: str
-    :param dirname: Directory to read the parameter file from.
-    :type compressed: bool
-    :param compressed: Whether the directory is compressed or not.
-    """
-    from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
-    from eqcorrscan.core.match_filter.template import Template
-
-    templates = []
-    if compressed:
-        arc = tarfile.open(dirname, "r:*")
-        members = arc.getmembers()
-        _parfile = [member for member in members
-                    if member.name.split(os.sep)[-1] ==
-                    'template_parameters.csv']
-        if len(_parfile) == 0:
-            arc.close()
-            raise MatchFilterError(
-                'No template parameter file in archive')
-        parfile = arc.extractfile(_parfile[0])
-    else:
-        parfile = open(dirname + '/' + 'template_parameters.csv', 'r')
-    for line in parfile:
-        t_in = Template()
-        for key_pair in line.rstrip().split(','):
-            if key_pair.split(':')[0].strip() == 'name':
-                t_in.__dict__[key_pair.split(':')[0].strip()] = \
-                    key_pair.split(':')[-1].strip()
-            elif key_pair.split(':')[0].strip() == 'filt_order':
-                try:
-                    t_in.__dict__[key_pair.split(':')[0].strip()] = \
-                        int(key_pair.split(':')[-1])
-                except ValueError:
-                    pass
-            else:
-                try:
-                    t_in.__dict__[key_pair.split(':')[0].strip()] = \
-                        float(key_pair.split(':')[-1])
-                except ValueError:
-                    pass
-        templates.append(t_in)
-    parfile.close()
-    if compressed:
-        arc.close()
-    return templates
-
-
-def _resolved(x):
-    return os.path.realpath(os.path.abspath(x))
-
-
-def _badpath(path, base):
-    """
-    joinpath will ignore base if path is absolute.
-    """
-    return not _resolved(os.path.join(base, path)).startswith(base)
-
-
-def _badlink(info, base):
+def _remove_duplicates(party):
+    for family in party:
+        if family is not None:
+            # Slow uniq:
+            # family.detections = family._uniq().detections
+            # Very quick uniq:
+            det_tuples = [
+                (det.id, str(det.detect_time), det.detect_val)
+                for det in family]
+            # Retrieve the indices for the first occurrence of each
+            # detection in the family (so only unique detections will
+            # remain).
+            uniq_det_tuples, uniq_det_indices = np.unique(
+                det_tuples, return_index=True, axis=0)
+            uniq_detections = []
+            for uniq_det_index in uniq_det_indices:
+                uniq_detections.append(family[uniq_det_index])
+            family.detections = uniq_detections
+    return party
+
+
+def _moveout(st: Stream) -> float:
+    """ Maximum moveout across template in seconds. """
+    return max(tr.stats.starttime for tr in st) - min(
+        tr.stats.starttime for tr in st)
+
+
+def _mad(cccsum):
     """
-    Links are interpreted relative to the directory containing the link
+    Internal helper to compute MAD-thresholds in parallel.
     """
-    tip = _resolved(os.path.join(base, os.path.dirname(info.name)))
-    return _badpath(info.linkname, base=tip)
+    return np.median(np.abs(cccsum))
 
 
-def _safemembers(members):
-    """Check members of a tar archive for safety.
-    Ensure that they do not contain paths or links outside of where we
-    need them - this would only happen if the archive wasn't made by
-    eqcorrscan.
+def _pickle_stream(stream: Stream, filename: str):
+    Logger.info(f"Pickling stream of {len(stream)} traces to {filename}")
+    with open(filename, "wb") as f:
+        pickle.dump(stream, f)
+    Logger.info(f"Pickled to {filename}")
+    return
 
-    :type members: :class:`tarfile.TarFile`
-    :param members: an open tarfile.
-    """
-    base = _resolved(".")
 
-    for finfo in members:
-        if _badpath(finfo.name, base):
-            print(finfo.name, "is blocked (illegal path)")
-        elif finfo.issym() and _badlink(finfo, base):
-            print(finfo.name, "is blocked: Hard link to", finfo.linkname)
-        elif finfo.islnk() and _badlink(finfo, base):
-            print(finfo.name, "is blocked: Symlink to", finfo.linkname)
-        else:
-            yield finfo
+def _unpickle_stream(filename: str) -> Stream:
+    Logger.info(f"Unpickling from {filename}")
+    with open(filename, "rb") as f:
+        stream = pickle.load(f)
+    assert isinstance(stream, Stream)
+    Logger.info(f"Unpickled stream of {len(stream)} traces from {filename}")
+    return stream
 
 
 def extract_from_stream(stream, detections, pad=5.0, length=30.0):
@@ -413,7 +351,8 @@ def normxcorr2(template, image):
     """
     array_xcorr = get_array_xcorr()
     # Check that we have been passed numpy arrays
-    if type(template) != np.ndarray or type(image) != np.ndarray:
+    if (not isinstance(template, np.ndarray)
+            or not isinstance(image, np.ndarray)):
         Logger.error(
             'You have not provided numpy arrays, I will not convert them')
         return 'NaN'
diff --git a/eqcorrscan/core/match_filter/helpers/processes.py b/eqcorrscan/core/match_filter/helpers/processes.py
new file mode 100644
index 000000000..7a85945c0
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/processes.py
@@ -0,0 +1,708 @@
+"""
+Functions for network matched-filter detection of seismic data.
+
+Designed to cross-correlate templates generated by template_gen function
+with data and output the detections.
+
+:copyright:
+    EQcorrscan developers.
+
+:license:
+    GNU Lesser General Public License, Version 3
+    (https://www.gnu.org/copyleft/lesser.html)
+"""
+import os
+import tempfile
+import time
+import traceback
+import logging
+import numpy as np
+
+from typing import List, Union, Iterable
+from timeit import default_timer
+
+from multiprocessing import Queue
+from queue import Empty
+
+from obspy import Stream
+
+from eqcorrscan.core.match_filter.helpers import (
+    _pickle_stream, _unpickle_stream)
+from eqcorrscan.core.match_filter.helpers.tribe import (
+    _download_st, _pre_process, _group, _detect,
+    _read_template_db, _make_party)
+
+from eqcorrscan.utils.correlate import (
+    _get_array_dicts, _fmf_stabilisation, _fmf_reshape)
+from eqcorrscan.utils.pre_processing import (
+    _quick_copy_stream, _prep_data_for_correlation)
+
+
+Logger = logging.getLogger(__name__)
+
+
+###############################################################################
+#                           Process handlers
+###############################################################################
+
+
+class Poison(Exception):
+    """
+    Exception passing within EQcorrscan
+
+    :type value: Exception
+    :param value: Exception to pass between processes
+    """
+    def __init__(self, value):
+        """
+        Poison Exception.
+        """
+        self.value = value
+
+    def __repr__(self):
+        return f"Poison({self.value.__repr__()})"
+
+    def __str__(self):
+        """
+        >>> print(Poison(Exception('alf')))
+        Poison(Exception('alf'))
+        """
+        return self.__repr__()
+
+
+def _get_and_check(input_queue: Queue, poison_queue: Queue, step: float = 0.5):
+    """
+    Get from a queue and check for poison - returns Poisoned if poisoned.
+
+    :param input_queue: Queue to get something from
+    :param poison_queue: Queue to check for poison
+
+    :return: Item from queue or Poison.
+    """
+    while True:
+        poison = _check_for_poison(poison_queue)
+        if poison:
+            return poison
+        if input_queue.empty():
+            time.sleep(step)
+        else:
+            return input_queue.get_nowait()
+
+
+def _check_for_poison(poison_queue: Queue) -> Union[Poison, None]:
+    """
+    Check if poison has been added to the queue.
+    """
+    Logger.debug("Checking for poison")
+    try:
+        poison = poison_queue.get_nowait()
+    except Empty:
+        return
+    # Put the poison back in the queue for another process to check on
+    Logger.error("Poisoned")
+    poison_queue.put(poison)
+    return Poison(poison)
+
+
+def _wait_on_output_to_be_available(
+    poison_queue: Queue,
+    output_queue: Queue,
+    raise_exception: bool = False,
+    item=None,
+    wait_warning: float = 60,
+) -> Union[Poison, None]:
+    """
+    Wait until the output queue is not full to put something in it.
+
+    :param poison_queue: Queue to put or containing poison
+    :param output_queue:
+        Output Queue to check whether we can put something in it
+    :param item: Thing to put in the queue when we can
+    :param raise_exception:
+        Whether to raise an exception on poison (True), or pass back (False)
+
+    :return: Poison if poisoned, or None if all okay
+
+    .. rubric:: Example
+
+    >>> from multiprocessing import Queue, Process
+    >>> poison_queue, output_queue = Queue(), Queue(maxsize=1)
+    >>> output_queue.put("Stopper")
+    >>> process = Process(
+    ...    target=_wait_on_output_to_be_available,
+    ...    kwargs={"poison_queue": poison_queue,
+    ...            "output_queue": output_queue,
+    ...            "raise_exception": True,
+    ...            "item": "Carry on",
+    ...            "wait_warning": 5})
+    >>> process.start()
+    >>> time.sleep(7)
+    >>> poison_queue.put(
+    ...    Poison(Exception("cyanide")))
+    >>> time.sleep(7)
+    >>> process.is_alive()
+    False
+    >>> process.join()
+    """
+    killed = _check_for_poison(poison_queue)
+    # Wait until output queue is empty to limit rate and memory use
+    tic = default_timer()
+    while output_queue.full():
+        # Keep on checking while we wait
+        killed = _check_for_poison(poison_queue)
+        if killed:
+            break
+        waited = default_timer() - tic
+        if waited > wait_warning:
+            Logger.debug("Waiting for output_queue to not be full")
+            tic = default_timer()
+    if not killed and item:
+        output_queue.put_nowait(item)
+    elif killed and raise_exception:
+        raise killed
+    return killed
+
+
+def _get_detection_stream(
+    template_channel_ids: List[tuple],
+    client,
+    input_time_queue: Queue,
+    retries: int,
+    min_gap: float,
+    buff: float,
+    output_filename_queue: Queue,
+    poison_queue: Queue,
+    temp_stream_dir: str,
+    full_stream_dir: str = None,
+    pre_process: bool = False,
+    parallel_process: bool = True,
+    process_cores: int = None,
+    daylong: bool = False,
+    overlap: Union[str, float] = "calculate",
+    ignore_length: bool = False,
+    ignore_bad_data: bool = False,
+    filt_order: int = None,
+    highcut: float = None,
+    lowcut: float = None,
+    samp_rate: float = None,
+    process_length: float = None,
+):
+    """
+    Get a stream to be used for detection from a client for a time period.
+
+    This function is designed to be run continuously within a Process and will
+    only stop when the next item in the input_time_queue is None.
+
+    This function uses .get_waveforms_bulk to get a Stream from a Client.
+    The specific time period to get data for is read from the input_time_queue.
+    Once the data have been loaded into memory from the Client, this function
+    then processes that Stream according to the processing parameters passed
+    as arguments to this function. Finally, this function writes the processed
+    Stream to disk in a temporary stream directory and puts the filename
+    for that stream in the output_filename_queue.
+
+    Optionally, an unprocessed version of the stream can be written to the
+    full_stream_dir directory to later provide an unprocessed copy of the
+    raw data. This can be helpful if data downloading is slow and the stream
+    is required for subsequent processing.
+
+    :param template_channel_ids:
+        Iterable of (network, station, location, channel) tuples to get data
+        for. Wildcards may be used if accepted by the client.
+    :param client:
+        Client-like object with at least a .get_waveforms_bulk method.
+    :param input_time_queue:
+        Queue of (starttime, endtime) tuples of UTCDateTimes to get data
+        between.
+    :param retries: See core.match_filter.tribe.client_detect
+    :param min_gap: See core.match_filter.tribe.client_detect
+    :param buff:
+        Length to pad downloaded data by - some clients do not provide all
+        data requested.
+    :param output_filename_queue:
+        Queue to put filenames of written streams into
+    :param poison_queue:
+        Queue to check for poison, or put poison into if something goes awry
+    :param temp_stream_dir:
+        Directory to write processed streams to.
+    :param full_stream_dir:
+        Directory to save unprocessed streams to. If None, will not be used.
+    :param pre_process: Whether to run pre-processing or not.
+    :param parallel_process:
+        Whether to process data in parallel (uses multi-threading)
+    :param process_cores:
+        Maximum number of cores to use for parallel processing
+    :param daylong: See utils.pre_processing.multi_process
+    :param overlap: See core.match_filter.tribe.detect
+    :param ignore_length: See utils.pre_processing.multi_process
+    :param ignore_bad_data: See utils.pre_processing.multi_process
+    :param filt_order: See utils.pre_processing.multi_process
+    :param highcut: See utils.pre_processing.multi_process
+    :param lowcut: See utils.pre_processing.multi_process
+    :param samp_rate: See utils.pre_processing.multi_process
+    :param process_length: See utils.pre_processing.multi_process
+    """
+    while True:
+        killed = _wait_on_output_to_be_available(
+            poison_queue=poison_queue, output_queue=output_filename_queue,
+            item=False)
+        if killed:
+            Logger.error("Killed")
+            break
+        try:
+            next_times = _get_and_check(input_time_queue, poison_queue)
+            if next_times is None:
+                break
+            if isinstance(next_times, Poison):
+                Logger.error("Killed")
+                break
+            starttime, endtime = next_times
+
+            st = _download_st(
+                starttime=starttime, endtime=endtime, buff=buff,
+                min_gap=min_gap, template_channel_ids=template_channel_ids,
+                client=client, retries=retries)
+            if len(st) == 0:
+                Logger.warning(f"No suitable data between {starttime} "
+                               f"and {endtime}, skipping")
+                continue
+            Logger.info(f"Downloaded stream of {len(st)} traces:")
+            for tr in st:
+                Logger.info(tr)
+            # Try to reduce memory consumption by getting rid of st if we can
+            if full_stream_dir:
+                for tr in st:
+                    tr.split().write(os.path.join(
+                        full_stream_dir,
+                        f"full_trace_{tr.id}_"
+                        f"{tr.stats.starttime.strftime('%y-%m-%dT%H-%M-%S')}"
+                        f".ms"), format="MSEED")
+            if not pre_process:
+                st_chunks = [st]
+            else:
+                template_ids = set(['.'.join(sid)
+                                    for sid in template_channel_ids])
+                # Group_process copies the stream.
+                st_chunks = _pre_process(
+                    st=st, template_ids=template_ids, pre_processed=False,
+                    filt_order=filt_order, highcut=highcut,
+                    lowcut=lowcut, samp_rate=samp_rate,
+                    process_length=process_length,
+                    parallel=parallel_process, cores=process_cores,
+                    daylong=daylong, ignore_length=ignore_length,
+                    overlap=overlap, ignore_bad_data=ignore_bad_data)
+                # We don't need to hold on to st!
+                del st
+            for chunk in st_chunks:
+                Logger.info(f"After processing stream has {len(chunk)} traces:")
+                for tr in chunk:
+                    Logger.info(tr)
+                if not os.path.isdir(temp_stream_dir):
+                    os.makedirs(temp_stream_dir)
+                chunk_file = os.path.join(
+                    temp_stream_dir,
+                    f"chunk_{len(chunk)}_"
+                    f"{chunk[0].stats.starttime.strftime('%Y-%m-%dT%H-%M-%S')}"
+                    f"_{os.getpid()}.pkl")
+                # Add PID to cope with multiple instances operating at once
+                _pickle_stream(chunk, chunk_file)
+                # Wait for output queue to be ready
+                _wait_on_output_to_be_available(
+                    poison_queue=poison_queue,
+                    output_queue=output_filename_queue,
+                    item=chunk_file, raise_exception=True)
+                del chunk
+        except Exception as e:
+            Logger.error(f"Caught exception {e} in downloader")
+            poison_queue.put(Poison(e))
+            traceback.print_tb(e.__traceback__)
+            break
+    # Wait for output queue to be ready
+    killed = _wait_on_output_to_be_available(
+        poison_queue=poison_queue,
+        output_queue=output_filename_queue,
+        raise_exception=False)
+    if killed:
+        poison_queue.put_nowait(killed)
+    else:
+        output_filename_queue.put(None)
+    return
+
+
+def _pre_processor(
+    input_stream_queue: Queue,
+    temp_stream_dir: str,
+    template_ids: set,
+    pre_processed: bool,
+    filt_order: int,
+    highcut: float,
+    lowcut: float,
+    samp_rate: float,
+    process_length: float,
+    parallel: bool,
+    cores: int,
+    daylong: bool,
+    ignore_length: bool,
+    overlap: float,
+    ignore_bad_data: bool,
+    output_filename_queue: Queue,
+    poison_queue: Queue,
+):
+    """
+    Consume a queue of input streams and process those streams.
+
+    This function is designed to be run continuously within a Process and will
+    only stop when the next item in the input_stream_queue is None.
+
+    This function consumes streams from the input_stream_queue and processes
+    them using utils.pre_processing functions. Processed streams are written
+    out to the temp_stream_dir and the filenames are produced in the
+    output_filename_queue.
+
+    :param input_stream_queue:
+        Input Queue to consume streams from.
+    :param temp_stream_dir: Directory to write processed streams to.
+    :param template_ids:
+        Iterable of seed ids in the template set. Only channels matching these
+        seed ids will be retained.
+    :param pre_processed: See core.match_filter.tribe.detect
+    :param filt_order: See utils.pre_processing.multi_process
+    :param highcut: See utils.pre_processing.multi_process
+    :param lowcut: See utils.pre_processing.multi_process
+    :param samp_rate: See utils.pre_processing.multi_process
+    :param process_length: See utils.pre_processing.multi_process
+    :param parallel: See utils.pre_processing.multi_process
+    :param cores: See utils.pre_processing.multi_process
+    :param daylong: See utils.pre_processing.multi_process
+    :param ignore_length: See utils.pre_processing.multi_process
+    :param overlap: See core.match_filter.tribe.detect
+    :param ignore_bad_data: See utils.pre_processing.multi_process
+    :param output_filename_queue:
+        Queue to put filenames of written streams into
+    :param poison_queue:
+         Queue to check for poison, or put poison into if something goes awry
+    """
+    while True:
+        killed = _check_for_poison(poison_queue)
+        if killed:
+            break
+        Logger.debug("Getting stream from queue")
+        st = _get_and_check(input_stream_queue, poison_queue)
+        if st is None:
+            Logger.info("Ran out of streams, stopping processing")
+            break
+        elif isinstance(st, Poison):
+            Logger.error("Killed")
+            break
+        if len(st) == 0:
+            break
+        Logger.info(f"Processing stream:\n{st}")
+
+        # Process stream
+        try:
+            st_chunks = _pre_process(
+                st, template_ids, pre_processed, filt_order, highcut, lowcut,
+                samp_rate, process_length, parallel, cores, daylong,
+                ignore_length, ignore_bad_data, overlap)
+            for chunk in st_chunks:
+                if not os.path.isdir(temp_stream_dir):
+                    os.makedirs(temp_stream_dir)
+                chunk_file = os.path.join(
+                    temp_stream_dir,
+                    f"chunk_{len(chunk)}_"
+                    f"{chunk[0].stats.starttime.strftime('%Y-%m-%dT%H-%M-%S')}"
+                    f"_{os.getpid()}.pkl")
+                # Add PID to cope with multiple instances operating at once
+                _pickle_stream(chunk, chunk_file)
+                # Wait for output queue to be ready
+                _wait_on_output_to_be_available(
+                    poison_queue=poison_queue,
+                    output_queue=output_filename_queue,
+                    item=chunk_file, raise_exception=True)
+                del chunk
+        except Exception as e:
+            Logger.error(
+                f"Caught exception in processor:\n {e}")
+            poison_queue.put_nowait(Poison(e))
+            traceback.print_tb(e.__traceback__)
+    # Wait for output queue to be ready
+    killed = _wait_on_output_to_be_available(
+        poison_queue=poison_queue,
+        output_queue=output_filename_queue,
+        raise_exception=False)
+    if killed:
+        poison_queue.put_nowait(killed)
+    else:
+        output_filename_queue.put_nowait(None)
+    return
+
+
+def _prepper(
+    input_stream_filename_queue: Queue,
+    templates: Union[List, dict],
+    group_size: int,
+    groups: Iterable[Iterable[str]],
+    output_queue: Queue,
+    poison_queue: Queue,
+    xcorr_func: str = None,
+):
+    """
+    Prepare templates and stream for correlation.
+
+    This function is designed to be run continuously within a Process and will
+    only stop when the next item in the input_stream_queue is None.
+
+    This function prepares (reshapes into numpy arrays) templates and streams
+    and ensures that the data are suitable for the cross-correlation function
+    specified.
+
+    :param input_stream_filename_queue:
+        Input Queue to consume stream_filenames from.
+    :param templates:
+        Either (a) a list of Template objects, or (b) a dictionary of pickled
+        template filenames, keyed by template name.
+    :param group_size:
+        See core.match_filter.tribe.detect
+    :param groups:
+        Iterable of groups, where each group is an iterable of the template
+        names in that group.
+    :param output_queue:
+        Queue to produce inputs for correlation to.
+    :param poison_queue:
+        Queue to check for poison, or put poison into if something goes awry
+    :param xcorr_func:
+        Name of correlation function backend to be used.
+    """
+    if isinstance(templates, dict):
+        # We have been passed a db of template files on disk
+        Logger.info("Deserializing templates from disk")
+        try:
+            templates = _read_template_db(templates)
+        except Exception as e:
+            Logger.error(f"Could not read from db due to {e}")
+            poison_queue.put_nowait(Poison(e))
+            return
+
+    while True:
+        killed = _check_for_poison(poison_queue)
+        if killed:
+            Logger.info("Killed in prepper")
+            break
+        Logger.info("Getting stream from queue")
+        st_file = _get_and_check(input_stream_filename_queue, poison_queue)
+        if st_file is None:
+            Logger.info("Got None for stream, prepper complete")
+            break
+        elif isinstance(st_file, Poison):
+            Logger.error("Killed")
+            break
+        if isinstance(st_file, Stream):
+            Logger.info("Stream provided")
+            st = st_file
+            # Write temporary cache of file
+            st_file = tempfile.NamedTemporaryFile().name
+            Logger.info(f"Writing temporary stream file to {st_file}")
+            try:
+                _pickle_stream(st, st_file)
+            except Exception as e:
+                Logger.error(
+                    f"Could not write temporary file {st_file} due to {e}")
+                poison_queue.put_nowait(Poison(e))
+                break
+        Logger.info(f"Reading stream from {st_file}")
+        try:
+            st = _unpickle_stream(st_file)
+        except Exception as e:
+            Logger.error(f"Error reading {st_file}: {e}")
+            poison_queue.put_nowait(Poison(e))
+            break
+        st_sids = {tr.id for tr in st}
+        if len(st_sids) < len(st):
+            _sids = [tr.id for tr in st]
+            _duplicate_sids = {
+                sid for sid in st_sids if _sids.count(sid) > 1}
+            poison_queue.put_nowait(Poison(NotImplementedError(
+                f"Multiple channels in continuous data for "
+                f"{', '.join(_duplicate_sids)}")))
+            break
+        # Do the grouping for this stream
+        Logger.info(f"Grouping {len(templates)} templates into groups "
+                    f"of {group_size} templates")
+        try:
+            template_groups = _group(sids=st_sids, templates=templates,
+                                     group_size=group_size, groups=groups)
+        except Exception as e:
+            Logger.error(e)
+            poison_queue.put_nowait(Poison(e))
+            break
+        Logger.info(f"Grouped into {len(template_groups)} groups")
+        for i, template_group in enumerate(template_groups):
+            killed = _check_for_poison(poison_queue)
+            if killed:
+                break
+            try:
+                template_streams = [
+                    _quick_copy_stream(t.st) for t in template_group]
+                template_names = [t.name for t in template_group]
+
+                # template_names, templates = zip(*template_group)
+                Logger.info(
+                    f"Prepping {len(template_streams)} "
+                    f"templates for correlation")
+                # We can just load in a fresh copy of the stream!
+                _st, template_streams, template_names = \
+                    _prep_data_for_correlation(
+                        stream=_unpickle_stream(st_file).merge(),
+                        templates=template_streams,
+                        template_names=template_names)
+                if len(_st) == 0:
+                    Logger.error(
+                        f"No traces returned from correlation prep: {_st}")
+                    continue
+                starttime = _st[0].stats.starttime
+
+                if xcorr_func in (None, "fmf", "fftw"):
+                    array_dict_tuple = _get_array_dicts(
+                        template_streams, _st, stack=True)
+                    stream_dict, template_dict, pad_dict, \
+                        seed_ids = array_dict_tuple
+                    if xcorr_func == "fmf":
+                        Logger.info("Prepping data for FMF")
+                        # Work out used channels here
+                        tr_chans = np.array(
+                            [~np.isnan(template_dict[seed_id]).any(axis=1)
+                             for seed_id in seed_ids])
+                        no_chans = np.sum(np.array(tr_chans).astype(int),
+                                          axis=0)
+                        chans = [[] for _i in range(len(templates))]
+                        for seed_id, tr_chan in zip(seed_ids, tr_chans):
+                            for chan, state in zip(chans, tr_chan):
+                                if state:
+                                    chan.append((seed_id.split('.')[1],
+                                                 seed_id.split('.')[-1].split(
+                                                     '_')[0]))
+                        # Reshape
+                        t_arr, d_arr, weights, pads = _fmf_reshape(
+                            template_dict=template_dict,
+                            stream_dict=stream_dict,
+                            pad_dict=pad_dict, seed_ids=seed_ids)
+                        # Stabilise
+                        t_arr, d_arr, multipliers = _fmf_stabilisation(
+                            template_arr=t_arr, data_arr=d_arr)
+                        # Wait for output queue to be ready
+                        _wait_on_output_to_be_available(
+                            poison_queue=poison_queue,
+                            output_queue=output_queue,
+                            item=(starttime, i, d_arr, template_names, t_arr,
+                                  weights, pads, chans, no_chans),
+                            raise_exception=True)
+                    else:
+                        Logger.info("Prepping data for FFTW")
+                        # Wait for output queue to be ready
+                        killed = _wait_on_output_to_be_available(
+                            poison_queue=poison_queue,
+                            output_queue=output_queue,
+                            item=(starttime, i, stream_dict, template_names,
+                                  template_dict, pad_dict, seed_ids),
+                            raise_exception=True)
+                else:
+                    Logger.info("Prepping data for standard correlation")
+                    # Wait for output queue to be ready
+                    killed = _wait_on_output_to_be_available(
+                        poison_queue=poison_queue, output_queue=output_queue,
+                        item=(starttime, i, _st, template_names,
+                              template_streams),
+                        raise_exception=True)
+            except Exception as e:
+                Logger.error(f"Caught exception in Prepper: {e}")
+                traceback.print_tb(e.__traceback__)
+                poison_queue.put_nowait(Poison(e))
+            i += 1
+        Logger.info(f"Removing temporary {st_file}")
+        os.remove(st_file)
+    # Wait for output queue to be ready
+    killed = _wait_on_output_to_be_available(
+        poison_queue=poison_queue, output_queue=output_queue,
+        raise_exception=False)
+    if killed:
+        poison_queue.put_nowait(killed)
+    else:
+        output_queue.put_nowait(None)
+    return
+
+
+def _make_detections(
+    input_queue: Queue,
+    delta: float,
+    templates: Union[List, dict],
+    threshold: float,
+    threshold_type: str,
+    save_progress: bool,
+    output_queue: Queue,
+    poison_queue: Queue,
+):
+    """
+    Construct Detection objects from sparse detection information.
+
+    This function is designed to be run continuously within a Process and will
+    only stop when the next item in the input_queue is None.
+
+    :param input_queue:
+        Queue of (starttime, peaks, thresholds, no_channels, channels,
+        template_names). Detections are made within `peaks`.
+    :param delta:
+        Sample rate of peaks to detect within in Hz
+    :param templates:
+        Template objects included in input_queue
+    :param threshold:
+        Overall threshold
+    :param threshold_type:
+        Overall threshold type
+    :param save_progress:
+        Whether to save progress or not: If true, individual Party files will
+        be written each time this is run.
+    :param output_queue:
+        Queue of output Party filenames.
+    :param poison_queue:
+        Queue to check for poison, or put poison into if something goes awry
+    """
+    chunk_id = 0
+    while True:
+        killed = _check_for_poison(poison_queue)
+        if killed:
+            break
+        try:
+            next_item = _get_and_check(input_queue, poison_queue)
+            if next_item is None:
+                Logger.info("_make_detections got None, stopping")
+                break
+            elif isinstance(next_item, Poison):
+                Logger.error("Killed")
+                break
+            starttime, all_peaks, thresholds, no_chans, \
+                chans, template_names = next_item
+            detections = _detect(
+                template_names=template_names, all_peaks=all_peaks,
+                starttime=starttime, delta=delta, no_chans=no_chans,
+                chans=chans, thresholds=thresholds)
+            Logger.info(f"Built {len(detections)}")
+            chunk_file = _make_party(
+                detections=detections, threshold=threshold,
+                threshold_type=threshold_type, templates=templates,
+                chunk_start=starttime, chunk_id=chunk_id,
+                save_progress=save_progress)
+            chunk_id += 1
+            output_queue.put_nowait(chunk_file)
+        except Exception as e:
+            Logger.error(
+                f"Caught exception in detector:\n {e}")
+            traceback.print_tb(e.__traceback__)
+            poison_queue.put_nowait(Poison(e))
+    output_queue.put_nowait(None)
+    return
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/helpers/tribe.py b/eqcorrscan/core/match_filter/helpers/tribe.py
new file mode 100644
index 000000000..9786a68fc
--- /dev/null
+++ b/eqcorrscan/core/match_filter/helpers/tribe.py
@@ -0,0 +1,693 @@
+"""
+Functions for network matched-filter detection of seismic data.
+
+Designed to cross-correlate templates generated by template_gen function
+with data and output the detections.
+
+:copyright:
+    EQcorrscan developers.
+
+:license:
+    GNU Lesser General Public License, Version 3
+    (https://www.gnu.org/copyleft/lesser.html)
+"""
+import os
+import pickle
+import logging
+import numpy as np
+
+from collections import defaultdict
+from typing import List, Set
+from timeit import default_timer
+
+from concurrent.futures import ThreadPoolExecutor
+
+from obspy import Stream, UTCDateTime
+
+from eqcorrscan.core.match_filter.template import (
+    Template, group_templates_by_seedid)
+from eqcorrscan.core.match_filter.detection import Detection
+from eqcorrscan.core.match_filter.party import Party
+from eqcorrscan.core.match_filter.family import Family
+from eqcorrscan.core.match_filter.helpers import _spike_test, _mad
+from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
+
+from eqcorrscan.utils.correlate import (
+    get_stream_xcorr, _stabalised_fmf, fftw_multi_normxcorr,
+    _zero_invalid_correlation_sums, _set_inner_outer_threading)
+from eqcorrscan.utils.pre_processing import (
+    _check_daylong, _group_process)
+from eqcorrscan.utils.findpeaks import multi_find_peaks
+from eqcorrscan.utils.plotting import _match_filter_plot
+
+Logger = logging.getLogger(__name__)
+
+
+def _wildcard_fill(
+    net: str, sta: str, loc: str, chan: str
+) -> [str, str, str, str]:
+    """
+    Convert none to wildcards. Cope with seisan channel naming.
+
+    .. rubric:: Example
+
+    >>> _wildcard_fill(None, None, None, None)
+    ('*', '*', '*', '*')
+    >>> _wildcard_fill("NZ", "FOZ", "10", "HZ")
+    ('NZ', 'FOZ', '10', 'H?Z')
+    """
+    if net in [None, '']:
+        net = "*"
+    if sta in [None, '']:
+        sta = "*"
+    if loc in [None, '']:
+        loc = "*"
+    if chan in [None, '']:
+        chan = "*"
+    # Cope with old seisan chans
+    if len(chan) == 2:
+        chan = f"{chan[0]}?{chan[-1]}"
+    return net, sta, loc, chan
+
+
+def _download_st(
+    starttime: UTCDateTime,
+    endtime: UTCDateTime,
+    buff: float,
+    min_gap: float,
+    template_channel_ids: List[tuple],
+    client,
+    retries: int
+) -> Stream:
+    """
+    Helper to download a stream from a client for a given start and end time.
+
+    Applies `buff` to extend download to (heopfully) ensure all data are
+    provided. Retries download up to `retries` times, and discards data
+    with large gaps.
+
+    :param starttime: Start time to download data from
+    :param endtime: End time to download data to
+    :param buff:
+        Length to pad downloaded data by - some clients do not provide all
+        data requested.
+    :param min_gap: See core.match_filter.tribe.client_detect
+    :param template_channel_ids:
+    :param client:
+        Client-like object with at least a .get_waveforms_bulk method.
+    :param retries: See core.match_filter.tribe.client_detect
+
+    :return: Stream as downloaded.
+    """
+    from obspy.clients.fdsn.header import FDSNException
+
+    bulk_info = []
+    for chan_id in template_channel_ids:
+        bulk_info.append((
+            chan_id[0], chan_id[1], chan_id[2], chan_id[3],
+            starttime - buff, endtime + buff))
+
+    for retry_attempt in range(retries):
+        try:
+            Logger.info(f"Downloading data between {starttime} and "
+                        f"{endtime}")
+            st = client.get_waveforms_bulk(bulk_info)
+            Logger.info(
+                "Downloaded data for {0} traces".format(len(st)))
+            break
+        except FDSNException as e:
+            if "Split the request in smaller" in " ".join(e.args):
+                Logger.warning(
+                    "Datacentre does not support large requests: "
+                    "splitting request into smaller chunks")
+                st = Stream()
+                for _bulk in bulk_info:
+                    try:
+                        st += client.get_waveforms_bulk([_bulk])
+                    except Exception as e:
+                        Logger.error("No data for {0}".format(_bulk))
+                        Logger.error(e)
+                        continue
+                Logger.info("Downloaded data for {0} traces".format(
+                    len(st)))
+                break
+        except Exception as e:
+            Logger.error(e)
+            continue
+    else:
+        raise MatchFilterError(
+            "Could not download data after {0} attempts".format(
+                retries))
+    # Get gaps and remove traces as necessary
+    if min_gap:
+        gaps = st.get_gaps(min_gap=min_gap)
+        if len(gaps) > 0:
+            Logger.warning("Large gaps in downloaded data")
+            st.merge()
+            gappy_channels = list(
+                set([(gap[0], gap[1], gap[2], gap[3])
+                     for gap in gaps]))
+            _st = Stream()
+            for tr in st:
+                tr_stats = (tr.stats.network, tr.stats.station,
+                            tr.stats.location, tr.stats.channel)
+                if tr_stats in gappy_channels:
+                    Logger.warning(
+                        "Removing gappy channel: {0}".format(tr))
+                else:
+                    _st += tr
+            st = _st
+            st.split()
+    # Merge traces after gap checking
+    st = st.merge()
+    st.trim(starttime=starttime, endtime=endtime)
+
+    st_ids = [tr.id for tr in st]
+    # Remove traces that do not meet zero criteria
+    st.traces = [tr for tr in st if _check_daylong(tr.data)]
+    if len(st) < len(st_ids):
+        lost_ids = " ".join([tr_id for tr_id in st_ids
+                             if tr_id not in [tr.id for tr in st]])
+        Logger.warning(
+            f"Removed data for {lost_ids} due to more zero datapoints "
+            f"than non-zero.")
+
+    st_ids = [tr.id for tr in st]
+    # Remove short traces
+    st.traces = [
+        tr for tr in st
+        if tr.stats.endtime - tr.stats.starttime > 0.8 * (endtime - starttime)]
+    if len(st) < len(st_ids):
+        lost_ids = " ".join([tr_id for tr_id in st_ids
+                             if tr_id not in [tr.id for tr in st]])
+        Logger.warning(
+            f"Removed data for {lost_ids} due to less than 80% of the "
+            f"required length.")
+
+    return st
+
+
+def _pre_process(
+    st: Stream,
+    template_ids: set,
+    pre_processed: bool,
+    filt_order: int,
+    highcut: float,
+    lowcut: float,
+    samp_rate: float,
+    process_length: float,
+    parallel: bool,
+    cores: int,
+    daylong: bool,
+    ignore_length: bool,
+    ignore_bad_data: bool,
+    overlap: float, **kwargs
+) -> Stream:
+    """
+    Basic matched-filter processing flow. Data are processing in-place.
+
+    :param st: Stream to process
+    :param template_ids:
+        Iterable of seed ids in the template set. Only channels matching these
+        seed ids will be retained.
+    :param pre_processed: See core.match_filter.tribe.detect
+    :param filt_order: See utils.pre_processing.multi_process
+    :param highcut: See utils.pre_processing.multi_process
+    :param lowcut: See utils.pre_processing.multi_process
+    :param samp_rate: See utils.pre_processing.multi_process
+    :param process_length: See utils.pre_processing.multi_process
+    :param parallel: See utils.pre_processing.multi_process
+    :param cores: See utils.pre_processing.multi_process
+    :param daylong: See utils.pre_processing.multi_process
+    :param ignore_length: See utils.pre_processing.multi_process
+    :param overlap: See core.match_filter.tribe.detect
+    :param ignore_bad_data: See utils.pre_processing.multi_process
+    :param template_ids:
+
+    :return: Processed stream
+    """
+    # Retain only channels that have matches in templates
+    Logger.info(template_ids)
+    st = Stream([tr for tr in st if tr.id in template_ids])
+    Logger.info(f"Processing {(len(st))} channels")
+    if len(st) == 0:
+        raise IndexError(
+            "No matching channels between stream and templates")
+    tic = default_timer()
+    _spike_test(st)
+    toc = default_timer()
+    Logger.info(f"Checking for spikes took {toc - tic:.4f} s")
+    if not pre_processed:
+        st_chunks = _group_process(
+            filt_order=filt_order,
+            highcut=highcut,
+            lowcut=lowcut,
+            samp_rate=samp_rate,
+            process_length=process_length,
+            parallel=parallel,
+            cores=cores,
+            stream=st,
+            daylong=daylong,
+            ignore_length=ignore_length,
+            overlap=overlap,
+            ignore_bad_data=ignore_bad_data)
+    else:
+        st_chunks = [st]
+    Logger.info(f"Stream has been split into {len(st_chunks)} chunks")
+    return st_chunks
+
+
+def _group(
+    sids: Set[str],
+    templates: List[Template],
+    group_size: int,
+    groups: List[List[str]] = None
+) -> List[List[Template]]:
+    """
+    Group templates either by seed id, or using pre-computed groups
+
+    :param sids: Seed IDs available in stream
+    :param templates: Templates to group
+    :param group_size: Maximum group size
+    :param groups: [Optional] List of List of template names in groups
+    :return: Groups of templates.
+    """
+    Logger.info(f"Grouping for {sids}")
+    if groups:
+        Logger.info("Using pre-computed groups")
+        t_dict = {t.name: t for t in templates}
+        template_groups = []
+        for grp in groups:
+            template_group = [
+                t_dict.get(t_name) for t_name in grp
+                if t_name in t_dict.keys()]
+            if len(template_group):
+                template_groups.append(template_group)
+        return template_groups
+    template_groups = group_templates_by_seedid(
+        templates=templates,
+        st_seed_ids=sids,
+        group_size=group_size)
+    if len(template_groups) == 1 and len(template_groups[0]) == 0:
+        Logger.error("No matching ids between stream and templates")
+        raise IndexError("No matching ids between stream and templates")
+    return template_groups
+
+
+def _corr_and_peaks(
+    templates: List[Template],
+    template_names: List[str],
+    stream: Stream,
+    xcorr_func: str,
+    concurrency: str,
+    cores: int,
+    i: int,
+    export_cccsums: bool,
+    parallel: bool,
+    peak_cores: int,
+    threshold: float,
+    threshold_type: str,
+    trig_int: float,
+    sampling_rate: float,
+    full_peaks: bool,
+    plot: bool,
+    plotdir: str,
+    plot_format: str,
+    prepped: bool = False,
+    **kwargs
+):
+    """
+    Compute cross-correlation between templates and a stream. Returns peaks in
+     correlation function.
+
+    :param templates: Templates to correlate
+    :param template_names: Names of templates (ordered as templates)
+    :param stream: Stream to correlate templates with
+    :param xcorr_func: Cross-correlation function to use
+    :param concurrency: Concurrency of cross-correlation function
+    :param cores: Cores (threads) to use for cross-correlation
+    :param i: Group-id (internal book-keeping)
+    :param export_cccsums: Whether to export the raw cross-correlation sums
+    :param parallel: Whether to compute peaks in parallel
+    :param peak_cores: Number of cores (threads) to use for peak finding
+    :param threshold: Threshold value (user-defined)
+    :param threshold_type: Threshold type (e.g. MAD, ...)
+    :param trig_int: Trigger interval in seconds
+    :param sampling_rate: Sampling rate of data
+    :param full_peaks: Whether to compute full peaks, or fast peaks.
+    :param plot: Whether to plot correlation sums and peaks or not
+    :param plotdir: Where to save plots if made
+    :param plot_format: What format (extension) to use for plots.
+    :param prepped:
+        Whether data have already been prepared for correlation or not.
+        If prepped, inputs change for a specific xcorr-function, see code.
+
+    :return: Peaks, thresholds, number of channels, channels for each template
+    """
+    # Special cases for fmf and fftw to minimize reshaping time.
+    Logger.info(
+        f"Starting correlation run for template group {i}")
+    tic = default_timer()
+    if prepped and xcorr_func == "fmf":
+        assert isinstance(templates, np.ndarray)
+        assert isinstance(stream, np.ndarray)
+        # These need to be passed from queues.
+        pads = kwargs.get('pads')
+        weights = kwargs.get('weights')
+        chans = kwargs.get("chans")
+        no_chans = kwargs.get("no_chans")
+        # We do not care about removing the gain from our data, we copied it.
+        multipliers = np.ones((len(stream), 1))
+        step = 1  # We only implement single-step correlations
+        if concurrency in ("multithread", "multiprocess"):
+            arch = "cpu"
+        else:
+            arch = "gpu"
+        cccsums = _stabalised_fmf(
+            template_arr=templates, data_arr=stream, weights=weights,
+            pads=pads, arch=arch, multipliers=multipliers, step=step)
+    elif prepped and xcorr_func in ("fftw", None):
+        assert isinstance(templates, dict)
+        assert isinstance(stream, dict)
+        pads = kwargs.pop('pads')
+        seed_ids = kwargs.pop("seed_ids")
+        num_cores_inner, num_cores_outer = _set_inner_outer_threading(
+            kwargs.get('cores', None), kwargs.get("cores_outer", None),
+            len(stream))
+
+        cccsums, tr_chans = fftw_multi_normxcorr(
+            template_array=templates, stream_array=stream,
+            pad_array=pads, seed_ids=seed_ids, cores_inner=num_cores_inner,
+            cores_outer=num_cores_outer, stack=True, **kwargs)
+        n_templates = len(cccsums)
+        # Post processing
+        no_chans = np.sum(np.array(tr_chans).astype(int), axis=0)
+        chans = [[] for _i in range(n_templates)]
+        for seed_id, tr_chan in zip(seed_ids, tr_chans):
+            for chan, state in zip(chans, tr_chan):
+                if state:
+                    chan.append(seed_id)
+        cccsums = _zero_invalid_correlation_sums(cccsums, pads, chans)
+        chans = [[(seed_id.split('.')[1], seed_id.split('.')[-1].split('_')[0])
+                  for seed_id in _chans] for _chans in chans]
+    else:
+        # The default just uses stream xcorr funcs.
+        multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency)
+        cccsums, no_chans, chans = multichannel_normxcorr(
+            templates=templates, stream=stream, cores=cores, **kwargs
+        )
+    if len(cccsums[0]) == 0:
+        raise MatchFilterError(
+            f"Correlation has not run for group {i}, "
+            f"zero length cccsum")
+    toc = default_timer()
+    Logger.info(
+        f"Correlations for group {i} of {len(template_names)} "
+        f"templates took {toc - tic:.4f} s")
+    Logger.debug(
+        f"The shape of the returned cccsums in group {i} "
+        f"is: {cccsums.shape}")
+    Logger.debug(
+        f'This is from {len(templates)} templates correlated with '
+        f'{len(stream)} channels of data in group {i}')
+
+    # Handle saving correlation stats
+    if export_cccsums:
+        for i, cccsum in enumerate(cccsums):
+            fname = (
+                f"{template_names[i]}-{stream[0].stats.starttime}-"
+                f"{stream[0].stats.endtime}_cccsum.npy")
+            np.save(file=fname, arr=cccsum)
+            Logger.info(
+                f"Saved correlation statistic to {fname}")
+
+    # Zero mean check
+    if np.any(np.abs(cccsums.mean(axis=-1)) > 0.05):
+        Logger.warning(
+            'Mean of correlations is non-zero!  Check this!')
+    if parallel:
+        Logger.info(f"Finding peaks using {peak_cores} threads")
+    else:
+        Logger.info("Finding peaks in serial")
+    # This is in the main process because transferring
+    #  lots of large correlation sums in queues is very slow
+    all_peaks, thresholds = _threshold(
+        cccsums=cccsums, no_chans=no_chans,
+        template_names=template_names, threshold=threshold,
+        threshold_type=threshold_type,
+        trig_int=int(trig_int * sampling_rate),
+        parallel=parallel, full_peaks=full_peaks,
+        peak_cores=peak_cores, plot=plot, stream=stream,
+        plotdir=plotdir, plot_format=plot_format)
+    return all_peaks, thresholds, no_chans, chans
+
+
+def _threshold(
+    cccsums: np.ndarray,
+    no_chans: list,
+    template_names: list,
+    threshold: float,
+    threshold_type: str,
+    trig_int: int,  # converted to samples before getting to this func.
+    parallel: bool,
+    full_peaks: bool,
+    peak_cores: int,
+    plot: bool,
+    stream: Stream,
+    plotdir: str,
+    plot_format: str,
+):
+    """
+    Find peaks within correlation functions for given thresholds.
+
+    :param cccsums: Numpy array of correlations [templates x samples]
+    :param no_chans:
+        Number of channels for each correlation (ordered as cccsums)
+    :param template_names:
+        Template names for each correlation (ordered as cccsums)
+    :param threshold: Input threshold value
+    :param threshold_type: Input threshold type (e.g. MAD, ...)
+    :param trig_int: Trigger interval in SAMPLES.
+    :param parallel: Whether to compute peaks in parallel
+    :param full_peaks: Whether to compute full peaks or not
+    :param peak_cores: Number of cores (threads) to use for peak finding.
+    :param plot: Whether to plot the peak finding
+    :param stream: Stream for plotting (not needed otherwise)
+    :param plotdir: Directory to write plots to
+    :param plot_format: Format to save plots in
+
+    :return: (all peaks, used thresholds)
+    """
+    Logger.debug(f"Got cccsums shaped {cccsums.shape}")
+    Logger.debug(f"From {len(template_names)} templates")
+
+    tic = default_timer()
+    if str(threshold_type) == str("absolute"):
+        thresholds = [threshold for _ in range(len(cccsums))]
+    elif str(threshold_type) == str('MAD'):
+        median_cores = min([peak_cores, len(cccsums)])
+        if cccsums.size < 2e7:  # parallelism not worth it
+            median_cores = 1
+        with ThreadPoolExecutor(max_workers=median_cores) as executor:
+            # Because numpy releases GIL threading can use
+            # multiple cores
+            medians = executor.map(_mad, cccsums,
+                                   chunksize=len(cccsums) // median_cores)
+        thresholds = [threshold * median for median in medians]
+    else:
+        thresholds = [threshold * no_chans[i]
+                      for i in range(len(cccsums))]
+    toc = default_timer()
+    Logger.info(f"Computing thresholds took {toc - tic: .4f} s")
+    outtic = default_timer()
+    all_peaks = multi_find_peaks(
+        arr=cccsums, thresh=thresholds, parallel=parallel,
+        trig_int=trig_int, full_peaks=full_peaks, cores=peak_cores)
+    outtoc = default_timer()
+    Logger.info(f"Finding peaks for group took {outtoc - outtic:.4f}s")
+
+    # Plotting
+    if plot and stream:
+        for i, cccsum in enumerate(cccsums):
+            _match_filter_plot(
+                stream=stream, cccsum=cccsum,
+                template_names=template_names,
+                rawthresh=thresholds[i], plotdir=plotdir,
+                plot_format=plot_format, i=i)
+        else:
+            Logger.error("Plotting enabled but not stream found to plot")
+
+    return all_peaks, thresholds
+
+
+def _detect(
+    template_names: List[str],
+    all_peaks: np.ndarray,
+    starttime: UTCDateTime,
+    delta: float,
+    no_chans: List[int],
+    chans: List[List[str]],
+    thresholds: List[float]
+) -> List[Detection]:
+    """
+    Convert peaks to Detection objects
+
+    :param template_names: Lis of template names
+    :param all_peaks: Array of peaks orders as template_names
+    :param starttime: Starttime for peak index relative time
+    :param delta: Sample interval to convert peaks from samples to time
+    :param no_chans: Number of channels used (ordered as template_names)
+    :param chans: Channels used (ordered as template_names)
+    :param thresholds: Thresholds used (ordered as template_names)
+
+    :return: List of detections.
+    """
+    tic = default_timer()
+    detections = []
+    for i, template_name in enumerate(template_names):
+        if not all_peaks[i]:
+            Logger.debug(f"Found 0 peaks for template {template_name}")
+            continue
+        Logger.debug(f"Found {len(all_peaks[i])} detections "
+                     f"for template {template_name}")
+        for peak in all_peaks[i]:
+            detecttime = starttime + (peak[1] * delta)
+            if peak[0] > no_chans[i]:
+                Logger.error(f"Correlation sum {peak[0]} exceeds "
+                             f"bounds ({no_chans[i]}")
+            detection = Detection(
+                template_name=template_name, detect_time=detecttime,
+                no_chans=no_chans[i], detect_val=peak[0],
+                threshold=thresholds[i], typeofdet='corr',
+                chans=chans[i],
+                threshold_type=None,
+                # Update threshold_type and threshold outside of this func.
+                threshold_input=None)
+            detections.append(detection)
+    toc = default_timer()
+    Logger.info(f"Forming detections took {toc - tic:.4f} s")
+    return detections
+
+
+def _load_template(t_file: str) -> Template:
+    """ Load a pickled template from a file """
+    try:
+        with open(t_file, "rb") as f:
+            t = pickle.load(f)
+    except Exception as e:
+        Logger.warning(f"Could not read template from {t_file} due to {e}")
+        return None
+    assert isinstance(t, Template), "Loaded object is not a Template, aborting"
+    return t
+
+
+def _read_template_db(template_file_dict: dict) -> List[Template]:
+    """
+    Read templates from files on disk.
+
+    :param template_file_dict: Template file names keyed by template name
+
+    :returns: list of templates
+    """
+    with ThreadPoolExecutor() as executor:
+        templates = executor.map(_load_template, template_file_dict.values())
+    templates = [t for t in templates if t]
+    Logger.info(f"Deserialized {len(templates)} templates")
+    if len(templates) < len(template_file_dict):
+        Logger.warning(f"Expected {len(template_file_dict)} templates, "
+                       f"but found {len(templates)}")
+    return templates
+
+
+def _make_party(
+    detections: List[Detection],
+    threshold: float,
+    threshold_type: str,
+    templates: List[Template],
+    chunk_start: UTCDateTime,
+    chunk_id: int,
+    save_progress: bool
+) -> str:
+    """
+    Construct a Party from Detections.
+
+    :param detections: List of detections
+    :param threshold: Input threshold
+    :param threshold_type: Input threshold type
+    :param templates: Templates used in detections
+    :param chunk_start: Starttime of party epoch
+    :param chunk_id: Internal index for party epoch
+    :param save_progress: Whether to save progress or not
+
+    :return: The filename the party has been pickled to.
+    """
+    chunk_dir = os.path.join(
+        ".parties", "{chunk_start.year}", "{chunk_start.julday:03d}")
+    chunk_file_str = os.path.join(
+        chunk_dir, "chunk_party_{chunk_start_str}_{chunk_id}_{pid}.pkl")
+    # Process ID included in chunk file to avoid multiple processes writing
+    # and reading and removing the same files.
+
+    # Get the results out of the end!
+    Logger.info(f"Made {len(detections)} detections")
+
+    # post - add in threshold, threshold_type to all detections
+    Logger.info("Adding threshold to detections")
+    for detection in detections:
+        detection.threshold_input = threshold
+        detection.threshold_type = threshold_type
+
+    # Select detections very quickly: detection order does not
+    # change, make dict of keys: template-names and values:
+    # list of indices and use indices to select
+    Logger.info("Making dict of detections")
+    detection_idx_dict = defaultdict(list)
+    for n, detection in enumerate(detections):
+        detection_idx_dict[detection.template_name].append(n)
+
+    # Convert to Families and build party.
+    Logger.info("Converting to party and making events")
+    chunk_party = Party()
+
+    # Make a dictionary of templates keyed by name - we could be passed a dict
+    # of pickled templates
+    if not isinstance(templates, dict):
+        templates = {t.name: t for t in templates}
+
+    for t_name, template in templates.items():
+        family_detections = [
+            detections[idx]
+            for idx in detection_idx_dict[t_name]]
+        # Make party sparse - only write out families with detections
+        if len(family_detections):
+            if not isinstance(template, Template):
+                # Try and read this from disk
+                with open(template, "rb") as f:
+                    template = pickle.load(f)
+            for d in family_detections:
+                d._calculate_event(template=template)
+            family = Family(
+                template=template, detections=family_detections)
+            chunk_party += family
+
+    Logger.info("Pickling party")
+    if not os.path.isdir(chunk_dir.format(chunk_start=chunk_start)):
+        os.makedirs(chunk_dir.format(chunk_start=chunk_start))
+
+    chunk_file = chunk_file_str.format(
+        chunk_start_str=chunk_start.strftime("%Y-%m-%dT%H-%M-%S"),
+        chunk_start=chunk_start,
+        chunk_id=chunk_id, pid=os.getpid())
+    with open(chunk_file, "wb") as _f:
+        pickle.dump(chunk_party, _f)
+    Logger.info("Completed party processing")
+
+    if save_progress:
+        Logger.info(f"Written chunk to {chunk_file}")
+    return chunk_file
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/eqcorrscan/core/match_filter/matched_filter.py b/eqcorrscan/core/match_filter/matched_filter.py
index 7c6c50dd9..b40d2fa63 100644
--- a/eqcorrscan/core/match_filter/matched_filter.py
+++ b/eqcorrscan/core/match_filter/matched_filter.py
@@ -12,18 +12,11 @@
     (https://www.gnu.org/copyleft/lesser.html)
 """
 import logging
-from timeit import default_timer
 
 import numpy as np
-from obspy import Catalog, UTCDateTime, Stream
+from obspy import Stream
 
-from eqcorrscan.core.match_filter.helpers import (
-    _spike_test, extract_from_stream)
-
-from eqcorrscan.utils.correlate import get_stream_xcorr
-from eqcorrscan.utils.findpeaks import multi_find_peaks
-from eqcorrscan.utils.pre_processing import (
-    dayproc, shortproc, _prep_data_for_correlation)
+from eqcorrscan.core.match_filter.helpers import extract_from_stream
 
 Logger = logging.getLogger(__name__)
 
@@ -65,330 +58,13 @@ def __str__(self):
         return self.value
 
 
-def _group_detect(templates, stream, threshold, threshold_type, trig_int,
-                  plot=False, plotdir=None, group_size=None,
-                  pre_processed=False, daylong=False, parallel_process=True,
-                  xcorr_func=None, concurrency=None, cores=None,
-                  ignore_length=False, ignore_bad_data=False,
-                  overlap="calculate", full_peaks=False, process_cores=None,
-                  **kwargs):
-    """
-    Pre-process and compute detections for a group of templates.
-
-    Will process the stream object, so if running in a loop, you will want
-    to copy the stream before passing it to this function.
-
-    :type templates: list
-    :param templates: List of :class:`eqcorrscan.core.match_filter.Template`
-    :type stream: `obspy.core.stream.Stream`
-    :param stream: Continuous data to detect within using the Template.
-    :type threshold: float
-    :param threshold:
-        Threshold level, if using `threshold_type='MAD'` then this will be
-        the multiple of the median absolute deviation.
-    :type threshold_type: str
-    :param threshold_type:
-        The type of threshold to be used, can be MAD, absolute or
-        av_chan_corr.  See Note on thresholding below.
-    :type trig_int: float
-    :param trig_int:
-        Minimum gap between detections from one template in seconds.
-        If multiple detections occur within trig_int of one-another, the one
-        with the highest cross-correlation sum will be selected.
-    :type plot: bool
-    :param plot:
-        Turn plotting on or off.
-    :type plotdir: str
-    :param plotdir:
-        The path to save plots to. If `plotdir=None` (default) then the
-        figure will be shown on screen.
-    :type group_size: int
-    :param group_size:
-        Maximum number of templates to run at once, use to reduce memory
-        consumption, if unset will use all templates.
-    :type pre_processed: bool
-    :param pre_processed:
-        Set to True if `stream` has already undergone processing, in this
-        case eqcorrscan will only check that the sampling rate is correct.
-        Defaults to False, which will use the
-        :mod:`eqcorrscan.utils.pre_processing` routines to resample and
-        filter the continuous data.
-    :type daylong: bool
-    :param daylong:
-        Set to True to use the
-        :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
-        preforms additional checks and is more efficient for day-long data
-        over other methods.
-    :type parallel_process: bool
-    :param parallel_process:
-    :type xcorr_func: str or callable
-    :param xcorr_func:
-        A str of a registered xcorr function or a callable for implementing
-        a custom xcorr function. For more details see:
-        :func:`eqcorrscan.utils.correlate.register_array_xcorr`
-    :type concurrency: str
-    :param concurrency:
-        The type of concurrency to apply to the xcorr function. Options are
-        'multithread', 'multiprocess', 'concurrent'. For more details see
-        :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
-    :type cores: int
-    :param cores: Number of workers for processing and correlation.
-    :type ignore_length: bool
-    :param ignore_length:
-        If using daylong=True, then dayproc will try check that the data
-        are there for at least 80% of the day, if you don't want this check
-        (which will raise an error if too much data are missing) then set
-        ignore_length=True.  This is not recommended!
-    :type overlap: float
-    :param overlap:
-        Either None, "calculate" or a float of number of seconds to
-        overlap detection streams by.  This is to counter the effects of
-        the delay-and-stack in calculating cross-correlation sums. Setting
-        overlap = "calculate" will work out the appropriate overlap based
-        on the maximum lags within templates.
-    :type full_peaks: bool
-    :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks_compiled`
-    :type process_cores: int
-    :param process_cores:
-        Number of processes to use for pre-processing (if different to
-        `cores`).
-
-    :return:
-        :class:`eqcorrscan.core.match_filter.Party` of families of detections.
-    """
-    from eqcorrscan.core.match_filter.party import Party
-    from eqcorrscan.core.match_filter.family import Family
-
-    master = templates[0]
-    peak_cores = kwargs.get('peak_cores', process_cores)
-    kwargs.update(dict(peak_cores=peak_cores))
-    # Check that they are all processed the same.
-    lap = 0.0
-    for template in templates:
-        starts = [t.stats.starttime for t in template.st.sort(['starttime'])]
-        if starts[-1] - starts[0] > lap:
-            lap = starts[-1] - starts[0]
-        if not template.same_processing(master):
-            raise MatchFilterError('Templates must be processed the same.')
-    if overlap is None:
-        overlap = 0.0
-    elif not isinstance(overlap, float) and str(overlap) == str("calculate"):
-        overlap = lap
-    elif not isinstance(overlap, float):
-        raise NotImplementedError(
-            "%s is not a recognised overlap type" % str(overlap))
-    if overlap >= master.process_length:
-        Logger.warning(
-                f"Overlap of {overlap} s is greater than process "
-                f"length ({master.process_length} s), ignoring overlap")
-        overlap = 0
-    if not pre_processed:
-        if process_cores is None:
-            process_cores = cores
-        streams = _group_process(
-            template_group=templates, parallel=parallel_process,
-            cores=process_cores, stream=stream, daylong=daylong,
-            ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
-            overlap=overlap)
-        for _st in streams:
-            Logger.debug(f"Processed stream:\n{_st.__str__(extended=True)}")
-    else:
-        Logger.warning('Not performing any processing on the continuous data.')
-        streams = [stream]
-    detections = []
-    party = Party()
-    if group_size is not None:
-        n_groups = int(len(templates) / group_size)
-        if n_groups * group_size < len(templates):
-            n_groups += 1
-    else:
-        n_groups = 1
-    kwargs.update({'peak_cores': kwargs.get('peak_cores', process_cores)})
-    for st_chunk in streams:
-        chunk_start, chunk_end = (min(tr.stats.starttime for tr in st_chunk),
-                                  max(tr.stats.endtime for tr in st_chunk))
-        Logger.info(
-            f'Computing detections between {chunk_start} and {chunk_end}')
-        st_chunk.trim(starttime=chunk_start, endtime=chunk_end)
-        for tr in st_chunk:
-            if len(tr) > len(st_chunk[0]):
-                tr.data = tr.data[0:len(st_chunk[0])]
-        for i in range(n_groups):
-            if group_size is not None:
-                end_group = (i + 1) * group_size
-                start_group = i * group_size
-                if i == n_groups:
-                    end_group = len(templates)
-            else:
-                end_group = len(templates)
-                start_group = 0
-            template_group = [t for t in templates[start_group: end_group]]
-            detections += match_filter(
-                template_names=[t.name for t in template_group],
-                template_list=[t.st for t in template_group], st=st_chunk,
-                xcorr_func=xcorr_func, concurrency=concurrency,
-                threshold=threshold, threshold_type=threshold_type,
-                trig_int=trig_int, plot=plot, plotdir=plotdir, cores=cores,
-                full_peaks=full_peaks, **kwargs)
-            for template in template_group:
-                family = Family(template=template, detections=[])
-                for detection in detections:
-                    if detection.template_name == template.name:
-                        for pick in detection.event.picks:
-                            pick.time += template.prepick
-                        for origin in detection.event.origins:
-                            origin.time += template.prepick
-                        family.detections.append(detection)
-                party += family
-    return party
-
-
-def _group_process(template_group, parallel, cores, stream, daylong,
-                   ignore_length, ignore_bad_data, overlap):
-    """
-    Process data into chunks based on template processing length.
-
-    Templates in template_group must all have the same processing parameters.
-
-    :type template_group: list
-    :param template_group: List of Templates.
-    :type parallel: bool
-    :param parallel: Whether to use parallel processing or not
-    :type cores: int
-    :param cores: Number of cores to use, can be False to use all available.
-    :type stream: :class:`obspy.core.stream.Stream`
-    :param stream: Stream to process, will be left intact.
-    :type daylong: bool
-    :param daylong: Whether to enforce day-length files or not.
-    :type ignore_length: bool
-    :param ignore_length:
-        If using daylong=True, then dayproc will try check that the data
-        are there for at least 80% of the day, if you don't want this check
-        (which will raise an error if too much data are missing) then set
-        ignore_length=True.  This is not recommended!
-    :type ignore_bad_data: bool
-    :param ignore_bad_data:
-        If False (default), errors will be raised if data are excessively
-        gappy or are mostly zeros. If True then no error will be raised, but
-        an empty trace will be returned.
-    :type overlap: float
-    :param overlap: Number of seconds to overlap chunks by.
-
-    :return: list of processed streams.
-    """
-    master = template_group[0]
-    processed_streams = []
-    kwargs = {
-        'filt_order': master.filt_order,
-        'highcut': master.highcut, 'lowcut': master.lowcut,
-        'samp_rate': master.samp_rate, 'parallel': parallel,
-        'num_cores': cores, 'ignore_length': ignore_length,
-        'ignore_bad_data': ignore_bad_data}
-    # Processing always needs to be run to account for gaps - pre-process will
-    # check whether filtering and resampling needs to be done.
-    process_length = master.process_length
-    if daylong:
-        if not master.process_length == 86400:
-            Logger.warning(
-                'Processing day-long data, but template was cut from %i s long'
-                ' data, will reduce correlations' % master.process_length)
-        func = dayproc
-        process_length = 86400
-        # Check that data all start on the same day, otherwise strange
-        # things will happen...
-        starttimes = [tr.stats.starttime.date for tr in stream]
-        if not len(list(set(starttimes))) == 1:
-            Logger.warning('Data start on different days, setting to last day')
-            starttime = UTCDateTime(
-                stream.sort(['starttime'])[-1].stats.starttime.date)
-        else:
-            starttime = stream.sort(['starttime'])[0].stats.starttime
-    else:
-        # We want to use shortproc to allow overlaps
-        func = shortproc
-        starttime = stream.sort(['starttime'])[0].stats.starttime
-    endtime = stream.sort(['endtime'])[-1].stats.endtime
-    data_len_samps = round((endtime - starttime) * master.samp_rate) + 1
-    assert overlap < process_length, "Overlap must be less than process length"
-    chunk_len_samps = (process_length - overlap) * master.samp_rate
-    n_chunks = int(data_len_samps // chunk_len_samps)
-    Logger.info(f"Splitting these data in {n_chunks} chunks")
-    if n_chunks == 0:
-        Logger.error('Data must be process_length or longer, not computing')
-    _endtime = starttime
-    for i in range(n_chunks):
-        kwargs.update(
-            {'starttime': starttime + (i * (process_length - overlap))})
-        if not daylong:
-            _endtime = kwargs['starttime'] + process_length
-            kwargs.update({'endtime': _endtime})
-        else:
-            _endtime = kwargs['starttime'] + 86400
-        chunk_stream = stream.slice(starttime=kwargs['starttime'],
-                                    endtime=_endtime).copy()
-        Logger.debug(f"Processing chunk {i} between {kwargs['starttime']} "
-                     f"and {_endtime}")
-        if len(chunk_stream) == 0:
-            Logger.warning(
-                f"No data between {kwargs['starttime']} and {_endtime}")
-            continue
-        for tr in chunk_stream:
-            tr.data = tr.data[0:int(
-                process_length * tr.stats.sampling_rate)]
-        _chunk_stream_lengths = {
-            tr.id: tr.stats.endtime - tr.stats.starttime
-            for tr in chunk_stream}
-        for tr_id, chunk_length in _chunk_stream_lengths.items():
-            # Remove traces that are too short.
-            if not ignore_length and chunk_length <= .8 * process_length:
-                tr = chunk_stream.select(id=tr_id)[0]
-                chunk_stream.remove(tr)
-                Logger.warning(
-                    "Data chunk on {0} starting {1} and ending {2} is "
-                    "below 80% of the requested length, will not use"
-                    " this.".format(
-                        tr.id, tr.stats.starttime, tr.stats.endtime))
-        if len(chunk_stream) > 0:
-            Logger.debug(
-                f"Processing chunk:\n{chunk_stream.__str__(extended=True)}")
-            _processed_stream = func(st=chunk_stream, **kwargs)
-            # If data have more zeros then pre-processing will return a
-            # trace of 0 length
-            _processed_stream.traces = [
-                tr for tr in _processed_stream if tr.stats.npts != 0]
-            if len(_processed_stream) == 0:
-                Logger.warning(
-                    f"Data quality insufficient between {kwargs['starttime']}"
-                    f" and {_endtime}")
-                continue
-            # Pre-procesing does additional checks for zeros - we need to check
-            # again whether we actually have something useful from this.
-            processed_chunk_stream_lengths = [
-                tr.stats.endtime - tr.stats.starttime
-                for tr in _processed_stream]
-            if min(processed_chunk_stream_lengths) >= .8 * process_length:
-                processed_streams.append(_processed_stream)
-            else:
-                Logger.warning(
-                    f"Data quality insufficient between {kwargs['starttime']}"
-                    f" and {_endtime}")
-                continue
-
-    if _endtime < stream[0].stats.endtime:
-        Logger.warning(
-            "Last bit of data between {0} and {1} will go unused "
-            "because it is shorter than a chunk of {2} s".format(
-                _endtime, stream[0].stats.endtime, process_length))
-    return processed_streams
-
-
+# Note: maintained for backwards compatability. All efforts now in tribes
 def match_filter(template_names, template_list, st, threshold,
                  threshold_type, trig_int, plot=False, plotdir=None,
                  xcorr_func=None, concurrency=None, cores=None,
-                 plot_format='png', output_cat=False, output_event=True,
+                 plot_format='png', output_cat=False,
                  extract_detections=False, arg_check=True, full_peaks=False,
-                 peak_cores=None, spike_test=True, copy_data=True,
-                 export_cccsums=False, **kwargs):
+                 peak_cores=None, export_cccsums=False, **kwargs):
     """
     Main matched-filter detection function.
 
@@ -577,41 +253,8 @@ def match_filter(template_names, template_list, st, threshold,
         each template. For example, if a template trace starts 0.1 seconds
         before the actual arrival of that phase, then the pick time generated
         by match_filter for that phase will be 0.1 seconds early.
-
-    .. Note::
-        xcorr_func can be used as follows:
-
-        .. rubric::xcorr_func argument example
-
-        >>> import obspy
-        >>> import numpy as np
-        >>> from eqcorrscan.core.match_filter.matched_filter import (
-        ...    match_filter)
-        >>> from eqcorrscan.utils.correlate import time_multi_normxcorr
-        >>> # define a custom xcorr function
-        >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs):
-        ...     # Just to keep example short call other xcorr function
-        ...     # in practice you would define your own function here
-        ...     print('calling custom xcorr function')
-        ...     return time_multi_normxcorr(templates, stream, pads)
-        >>> # generate some toy templates and stream
-        >>> random = np.random.RandomState(42)
-        >>> template = obspy.read()
-        >>> stream = obspy.read()
-        >>> for num, tr in enumerate(stream):  # iter st and embed templates
-        ...     data = tr.data
-        ...     tr.data = random.randn(6000) * 5
-        ...     tr.data[100: 100 + len(data)] = data
-        >>> # call match_filter ane ensure the custom function is used
-        >>> detections = match_filter(
-        ...     template_names=['1'], template_list=[template], st=stream,
-        ...     threshold=.5, threshold_type='absolute', trig_int=1,
-        ...     plotvar=False,
-        ...     xcorr_func=custom_normxcorr)  # doctest:+ELLIPSIS
-        calling custom xcorr function...
     """
-    from eqcorrscan.core.match_filter.detection import Detection
-    from eqcorrscan.utils.plotting import _match_filter_plot
+    from eqcorrscan.core.match_filter import Tribe, Template
 
     if "plotvar" in kwargs.keys():
         Logger.warning("plotvar is depreciated, use plot instead")
@@ -654,115 +297,50 @@ def match_filter(template_names, template_list, st, threshold,
                 if isinstance(tr.data, np.ma.core.MaskedArray):
                     raise MatchFilterError(
                         'Template contains masked array, split first')
-    if spike_test:
-        Logger.info("Checking for spikes in data")
-        _spike_test(st)
-    if cores is not None:
-        parallel = True
-    else:
-        parallel = False
-    if peak_cores is None:
-        peak_cores = cores
-    if copy_data:
-        # Copy the stream here because we will muck about with it
-        Logger.info("Copying data to keep your input safe")
-        stream = st.copy()
-        templates = [t.copy() for t in template_list]
-        _template_names = template_names.copy()  # This can be a shallow copy
-    else:
-        stream, templates, _template_names = st, template_list, template_names
-
-    Logger.info("Reshaping templates")
-    stream, templates, _template_names = _prep_data_for_correlation(
-        stream=stream, templates=templates, template_names=_template_names)
-    if len(templates) == 0:
-        raise IndexError("No matching data")
-    Logger.info('Starting the correlation run for these data')
-    for template in templates:
-        Logger.debug(template.__str__())
-    Logger.debug(stream.__str__())
-    multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency)
-    outtic = default_timer()
-    [cccsums, no_chans, chans] = multichannel_normxcorr(
-        templates=templates, stream=stream, cores=cores, **kwargs)
-    if len(cccsums[0]) == 0:
-        raise MatchFilterError('Correlation has not run, zero length cccsum')
-    outtoc = default_timer()
-    Logger.info('Looping over templates and streams took: {0:.4f}s'.format(
-        outtoc - outtic))
-    Logger.debug(
-        'The shape of the returned cccsums is: {0}'.format(cccsums.shape))
-    Logger.debug(
-        'This is from {0} templates correlated with {1} channels of '
-        'data'.format(len(templates), len(stream)))
-    detections = []
-    if output_cat:
-        det_cat = Catalog()
-    if str(threshold_type) == str("absolute"):
-        thresholds = [threshold for _ in range(len(cccsums))]
-    elif str(threshold_type) == str('MAD'):
-        thresholds = [threshold * np.median(np.abs(cccsum))
-                      for cccsum in cccsums]
-    else:
-        thresholds = [threshold * no_chans[i] for i in range(len(cccsums))]
-    if peak_cores is None:
-        peak_cores = cores
-    outtic = default_timer()
-    all_peaks = multi_find_peaks(
-        arr=cccsums, thresh=thresholds, parallel=parallel,
-        trig_int=int(trig_int * stream[0].stats.sampling_rate),
-        full_peaks=full_peaks, cores=peak_cores)
-    outtoc = default_timer()
-    Logger.info("Finding peaks took {0:.4f}s".format(outtoc - outtic))
-    for i, cccsum in enumerate(cccsums):
-        if export_cccsums:
-            fname = (f"{_template_names[i]}-{stream[0].stats.starttime}-"
-                     f"{stream[0].stats.endtime}_cccsum.npy")
-            np.save(file=fname, arr=cccsum)
-            Logger.info(f"Saved correlation statistic to {fname}")
-        if np.abs(np.mean(cccsum)) > 0.05:
-            Logger.warning('Mean is not zero!  Check this!')
-        # Set up a trace object for the cccsum as this is easier to plot and
-        # maintains timing
-        if plot:
-            _match_filter_plot(
-                stream=stream, cccsum=cccsum, template_names=_template_names,
-                rawthresh=thresholds[i], plotdir=plotdir,
-                plot_format=plot_format, i=i)
-        if all_peaks[i]:
-            Logger.debug("Found {0} peaks for template {1}".format(
-                len(all_peaks[i]), _template_names[i]))
-            for peak in all_peaks[i]:
-                detecttime = (
-                    stream[0].stats.starttime +
-                    peak[1] / stream[0].stats.sampling_rate)
-                detection = Detection(
-                    template_name=_template_names[i], detect_time=detecttime,
-                    no_chans=no_chans[i], detect_val=peak[0],
-                    threshold=thresholds[i], typeofdet='corr', chans=chans[i],
-                    threshold_type=threshold_type, threshold_input=threshold)
-                if output_cat or output_event:
-                    detection._calculate_event(template_st=templates[i])
-                detections.append(detection)
-                if output_cat:
-                    det_cat.append(detection.event)
-        else:
-            Logger.debug("Found 0 peaks for template {0}".format(
-                _template_names[i]))
+
+    # Make a tribe and run tribe.detect
+    tribe = Tribe()
+    # Cope with naming issues
+    name_mapper = {template_name: f"template_{i}"
+                   for i, template_name in enumerate(template_names)}
+    for template, template_name in zip(template_list, template_names):
+        tribe += Template(
+            st=template, name=name_mapper[template_name],
+            process_length=(st[0].stats.endtime - st[0].stats.starttime),
+            prepick=0.0, samp_rate=template[0].stats.sampling_rate,
+        )
+
+    # Data must be pre-processed
+    party = tribe.detect(
+        stream=st, threshold=threshold, threshold_type=threshold_type,
+        trig_int=trig_int, plot=plot, plotdir=plotdir, daylong=False,
+        parallel_process=False, xcorr_func=xcorr_func, concurrency=concurrency,
+        cores=cores, ignore_length=True, ignore_bad_data=True, group_size=None,
+        overlap="calculate", full_peaks=full_peaks, save_progress=False,
+        process_cores=None, pre_processed=True, check_processing=False,
+        return_stream=False, plot_format=plot_format,
+        peak_cores=peak_cores, export_cccsums=export_cccsums, **kwargs
+    )
+    detections = [d for f in party for d in f]
+
+    # Remap template names
+    name_mapper = {val: key for key, val in name_mapper.items()}
+    for d in detections:
+        d.template_name = name_mapper[d.template_name]
+
     Logger.info("Made {0} detections from {1} templates".format(
-        len(detections), len(templates)))
+        len(detections), len(tribe)))
     if extract_detections:
-        detection_streams = extract_from_stream(stream, detections)
-    del stream, templates
+        detection_streams = extract_from_stream(st, detections)
 
     if output_cat and not extract_detections:
-        return detections, det_cat
+        return detections, party.get_catalog()
     elif not extract_detections:
         return detections
     elif extract_detections and not output_cat:
         return detections, detection_streams
     else:
-        return detections, det_cat, detection_streams
+        return detections, party.get_catalog(), detection_streams
 
 
 if __name__ == "__main__":
diff --git a/eqcorrscan/core/match_filter/party.py b/eqcorrscan/core/match_filter/party.py
index 3afb8041b..c4e45407d 100644
--- a/eqcorrscan/core/match_filter/party.py
+++ b/eqcorrscan/core/match_filter/party.py
@@ -15,6 +15,7 @@
 import glob
 import os
 import shutil
+import pickle
 import tarfile
 import tempfile
 import logging
@@ -93,12 +94,13 @@ def __iadd__(self, other):
             raise NotImplementedError(
                 'Ambiguous add, only allowed Party or Family additions.')
         for oth_fam in families:
-            added = False
-            for fam in self.families:
-                if fam.template == oth_fam.template:
-                    fam += oth_fam
-                    added = True
-            if not added:
+            fam = self.select(oth_fam.template.name)
+            # This check is taken care of by Family.__iadd__
+            # assert fam.template == oth_fam.template, (
+            #     "Matching template names, but different templates")
+            if fam is not None:
+                fam += oth_fam
+            else:
                 self.families.append(oth_fam)
         return self
 
@@ -294,6 +296,10 @@ def __len__(self):
             length += len(family)
         return length
 
+    @property
+    def _template_dict(self):
+        return {family.template.name: family for family in self}
+
     def select(self, template_name):
         """
         Select a specific family from the party.
@@ -302,8 +308,7 @@ def select(self, template_name):
         :param template_name: Template name of Family to select from a party.
         :returns: Family
         """
-        return [fam for fam in self.families
-                if fam.template.name == template_name][0]
+        return self._template_dict.get(template_name)
 
     def sort(self):
         """
@@ -490,7 +495,7 @@ def rethreshold(self, new_threshold, new_threshold_type='MAD',
 
     def decluster(self, trig_int, timing='detect', metric='avg_cor',
                   hypocentral_separation=None, min_chans=0,
-                  absolute_values=False):
+                  absolute_values=False, num_threads=None):
         """
         De-cluster a Party of detections by enforcing a detection separation.
 
@@ -527,6 +532,10 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor',
         :param absolute_values:
             Use the absolute value of the metric to choose the preferred
             detection.
+        :type num_threads: int
+        :param num_threads:
+            Number of threads to use for internal c-funcs if available.
+            Only valid if hypocentral_separation used.
 
         .. Warning::
             Works in place on object, if you need to keep the original safe
@@ -614,7 +623,8 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor',
             peaks_out = decluster_distance_time(
                 peaks=detect_vals, index=detect_times,
                 trig_int=trig_int * 10 ** 6, catalog=catalog,
-                hypocentral_separation=hypocentral_separation)
+                hypocentral_separation=hypocentral_separation,
+                num_threads=num_threads)
         else:
             peaks_out = decluster(
                 peaks=detect_vals, index=detect_times,
@@ -637,6 +647,7 @@ def decluster(self, trig_int, timing='detect', metric='avg_cor',
                 template=template,
                 detections=[d for d in declustered_detections
                             if d.template_name == template_name]))
+        # TODO: this might be better changing the list of families in place.
         self.families = new_families
         return self
 
@@ -796,6 +807,12 @@ def read(self, filename=None, read_detection_catalog=True,
             filenames = glob.glob(filename)
         for _filename in filenames:
             Logger.info(f"Reading from {_filename}")
+            # Cope with pickled files
+            if _filename.endswith('.pkl'):
+                with open(_filename, "rb") as _f:
+                    chunk_party = pickle.load(_f)
+                self.__iadd__(chunk_party)
+                continue
             with tarfile.open(_filename, "r:*") as arc:
                 temp_dir = tempfile.mkdtemp()
                 arc.extractall(path=temp_dir, members=_safemembers(arc))
@@ -833,8 +850,8 @@ def read(self, filename=None, read_detection_catalog=True,
         return self
 
     def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
-                 min_cc_from_mean_cc_factor=None,
-                 horizontal_chans=['E', 'N', '1', '2'], vertical_chans=['Z'],
+                 min_cc_from_mean_cc_factor=None, vertical_chans=['Z'],
+                 horizontal_chans=['E', 'N', '1', '2'],
                  cores=1, interpolate=False, plot=False, plotdir=None,
                  parallel=True, process_cores=None, ignore_length=False,
                  ignore_bad_data=False, export_cc=False, cc_dir=None,
diff --git a/eqcorrscan/core/match_filter/template.py b/eqcorrscan/core/match_filter/template.py
index 8b792de90..599d27ed3 100644
--- a/eqcorrscan/core/match_filter/template.py
+++ b/eqcorrscan/core/match_filter/template.py
@@ -18,13 +18,15 @@
 import shutil
 import logging
 
+from typing import List, Set
+from functools import lru_cache
+
 import numpy as np
 from obspy import Stream
 from obspy.core.event import Comment, Event, CreationInfo
 
 from eqcorrscan.core.match_filter.helpers import _test_event_similarity
-from eqcorrscan.core.match_filter.matched_filter import (
-    _group_detect, MatchFilterError)
+from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
 from eqcorrscan.core import template_gen
 
 Logger = logging.getLogger(__name__)
@@ -105,6 +107,15 @@ def __init__(self, name=None, st=None, lowcut=None, highcut=None,
                                                author=getpass.getuser())))
         self.event = event
 
+    @property
+    def _processing_parameters(self):
+        """
+        Internal function / attribute to return all processing parameters for
+        quick grouping of templates as tuple.
+        """
+        return (self.lowcut, self.highcut, self.samp_rate, self.filt_order,
+                self.process_length)
+
     def __repr__(self):
         """
         Print the template.
@@ -155,6 +166,17 @@ def __eq__(self, other, verbose=False, shallow_event_check=False):
         >>> template_a == template_b
         True
 
+        This should also cope with nan channels:
+        >>> import numpy as np
+        >>> template_c = template_a.copy()
+        >>> template_c.st[0].data = np.full(
+        ...    template_c.st[0].stats.npts, np.nan)
+        >>> template_c == template_a
+        False
+        >>> template_d = template_c.copy()
+        >>> template_d == template_c
+        True
+
 
         This will check all parameters of template including the data in the
         stream.
@@ -199,7 +221,8 @@ def __eq__(self, other, verbose=False, shallow_event_check=False):
                 if self_is_stream and other_is_stream:
                     for tr, oth_tr in zip(self.st.sort(),
                                           other.st.sort()):
-                        if not np.array_equal(tr.data, oth_tr.data):
+                        if not np.array_equal(
+                                tr.data, oth_tr.data, equal_nan=True):
                             if verbose:
                                 print("Template data are not equal on "
                                       "{0}".format(tr.id))
@@ -284,12 +307,9 @@ def same_processing(self, other):
         >>> template_a.same_processing(template_b)
         False
         """
-        for key in self.__dict__.keys():
-            if key in ['name', 'st', 'prepick', 'event', 'template_info']:
-                continue
-            if not self.__dict__[key] == other.__dict__[key]:
-                return False
-        return True
+        if self._processing_parameters == other._processing_parameters:
+            return True
+        return False
 
     def write(self, filename, format='tar'):
         """
@@ -510,11 +530,12 @@ def detect(self, stream, threshold, threshold_type, trig_int,
         .. Note::
             See tutorials for example.
         """
+        from eqcorrscan.core.match_filter.tribe import Tribe
         if kwargs.get("plotvar") is not None:
             Logger.warning("plotvar is depreciated, use plot instead")
             plot = kwargs.get("plotvar")
-        party = _group_detect(
-            templates=[self], stream=stream.copy(), threshold=threshold,
+        party = Tribe(templates=[self]).detect(
+            stream=stream, threshold=threshold,
             threshold_type=threshold_type, trig_int=trig_int, plotdir=plotdir,
             plot=plot, pre_processed=pre_processed, daylong=daylong,
             parallel_process=parallel_process, xcorr_func=xcorr_func,
@@ -707,6 +728,180 @@ def group_templates(templates):
     return template_groups
 
 
+def quick_group_templates(templates):
+    """
+    Group templates into sets of similarly processed templates.
+
+    :type templates: List of Tribe of Templates
+    :return: List of Lists of Templates.
+    """
+    # Get the template's processing parameters
+    processing_tuples = [template._processing_parameters
+                         for template in templates]
+    # Get list of unique parameter-tuples. Sort it so that the order in which
+    # the groups are processed is consistent across different runs.
+    uniq_processing_parameters = sorted(list(set(processing_tuples)))
+    # sort templates into groups
+    template_groups = []
+    for parameter_combination in uniq_processing_parameters:
+        # find indices of tuples in list with same parameters
+        template_indices_for_group = [
+            j for j, param_tuple in enumerate(processing_tuples)
+            if param_tuple == parameter_combination]
+
+        new_group = list()
+        for template_index in template_indices_for_group:
+            # use indices to sort templates into groups
+            new_group.append(templates[int(template_index)])
+        template_groups.append(new_group)
+    return template_groups
+
+
+def group_templates_by_seedid(
+        templates: List[Template],
+        st_seed_ids: Set[str],
+        group_size: int,
+) -> List[List[Template]]:
+    """
+    Group templates to reduce dissimilar traces
+
+    :param templates:
+        Templates to group together
+    :param st_seed_ids:
+        Seed ids in the stream to be matched with
+    :param group_size:
+        Maximum group size - will not exceed this size
+
+    :return:
+        List of lists of templates grouped.
+    """
+    all_template_sids = {tr.id for template in templates for tr in template.st}
+    if len(all_template_sids.intersection(st_seed_ids)) == 0:
+        Logger.warning(f"No matches between stream ({st_seed_ids} and "
+                       f"templates ({all_template_sids}")
+    # Get overlapping seed ids so that we only group based on the
+    # channels we have in the data. Use a tuple so that it hashes
+    template_seed_ids = tuple(
+        (template.name, tuple(
+            {tr.id for tr in template.st}.intersection(st_seed_ids)))
+        for template in templates)
+    # Don't use templates that don't have any overlap with the stream
+    template_seed_ids = tuple(
+        (t_name, t_chans) for t_name, t_chans in template_seed_ids
+        if len(t_chans))
+    Logger.info(f"Dropping {len(templates) - len(template_seed_ids)} "
+                f"templates due to no matched channels")
+    # We will need this dictionary at the end for getting the templates by id
+    template_dict = {t.name: t for t in templates}
+    # group_size can be None, in which case we don't actually need to group
+    if (group_size is not None) and (group_size < len(template_seed_ids)):
+        # Pass off to cached function
+        out_groups = _group_seed_ids(
+            template_seed_ids=template_seed_ids, group_size=group_size)
+
+        # Convert from groups of template names to groups of templates
+        out_groups = [[template_dict[t] for t in out_group]
+                      for out_group in out_groups]
+    else:
+        Logger.info(f"Group size ({group_size}) larger than n templates"
+                    f" ({len(template_seed_ids)}), no grouping performed")
+        out_groups = [[template_dict[t[0]] for t in template_seed_ids]]
+    assert sum(len(grp) for grp in out_groups) == len(template_seed_ids), (
+        "Something went wrong internally with grouping - we don't have the "
+        "right number of templates. Please report this bug")
+    return out_groups
+
+
+@lru_cache(maxsize=2)
+def _group_seed_ids(template_seed_ids, group_size):
+    """ Cachable version of internals to avoid re-computing for every day. """
+    # Convert hashable tuple to dict for ease
+    template_seed_ids = {tup[0]: set(tup[1]) for tup in template_seed_ids}
+    # Get initial groups with matched traces - sort by length
+    sorted_templates = sorted(
+        template_seed_ids, key=lambda key: len(template_seed_ids[key]),
+        reverse=True)
+    # Group into matching sets of seed-ids - ideally all groups would have the
+    # same seed ids and no nan-traces
+    groups, group, group_sids, group_sid = (
+        [], [sorted_templates[0]], [], template_seed_ids[sorted_templates[0]])
+
+    for i in range(1, len(sorted_templates)):
+        # Check that we don't exceed the maximum group size first
+        if len(group) >= group_size or (
+                template_seed_ids[sorted_templates[i]] != group_sid):
+            groups.append(group)
+            group = [sorted_templates[i]]
+            group_sids.append(group_sid)
+            group_sid = template_seed_ids[sorted_templates[i]]
+        else:
+            group.append(sorted_templates[i])
+    # Get the final group
+    groups.append(group)
+    group_sids.append(group_sid)
+    Logger.info(f"{len(groups)} initial template groups")
+
+    # Check if all the groups are full
+    groups_full = sum([len(grp) == group_size for grp in groups])
+    if groups_full >= (len(template_seed_ids) // group_size) - 1:
+        return groups
+
+    # Smush together groups until group-size condition is met.
+    # Make list of group ids - compare ungrouped sids to intersection of sids
+    # in group - add most similar, then repeat. Start with fewest sids.
+
+    n_original_groups = len(groups)
+    grouped = np.zeros(n_original_groups, dtype=bool)
+    # Use -1 as no group given
+    group_ids = np.ones(n_original_groups, dtype=int) * -1
+
+    # Sort groups by length of seed-ids
+    order = np.argsort([len(sid) for sid in group_sids])
+    groups = [groups[i] for i in order]
+    group_sids = [group_sids[i] for i in order]
+
+    # Assign shortest group to the zeroth group
+    grouped[0], group_id = 1, 0
+    group_ids[0] = group_id
+    group_sid = group_sids[0]
+    group_len = len(groups[0])
+    # Loop until all groups have been assigned a final group
+    Logger.info("Running similarity grouping")
+    while grouped.sum() < n_original_groups:
+        # Work out difference between groups
+        diffs = [(i, len(group_sid.symmetric_difference(other_sid)))
+                 for i, other_sid in enumerate(group_sids) if not grouped[i]]
+        diffs.sort(key=lambda tup: tup[1])
+        closest_group_id = diffs[0][0]
+
+        if group_len + len(groups[closest_group_id]) > group_size:
+            # Max size reached, make new group
+            group_id += 1
+            # Take the next shortest ungrouped group
+            i = 0
+            while grouped[i]:
+                i += 1
+            group_sid = group_sids[i]
+            grouped[i] = 1
+            group_ids[i] = group_id
+            group_len = len(groups[i])
+        else:
+            # Add in closest
+            grouped[closest_group_id] = 1
+            group_ids[closest_group_id] = group_id
+            # Update the group seed ids to include the new ones
+            group_sid = group_sid.union(group_sids[closest_group_id])
+            group_len += len(groups[closest_group_id])
+    Logger.info("Completed grouping")
+
+    out_groups = []
+    for group_id in set(group_ids):
+        out_group = [t for i in range(n_original_groups)
+                     if group_ids[i] == group_id for t in groups[i]]
+        out_groups.append(out_group)
+    return out_groups
+
+
 if __name__ == "__main__":
     import doctest
 
diff --git a/eqcorrscan/core/match_filter/tribe.py b/eqcorrscan/core/match_filter/tribe.py
index f03bf77a4..e415d1897 100644
--- a/eqcorrscan/core/match_filter/tribe.py
+++ b/eqcorrscan/core/match_filter/tribe.py
@@ -15,23 +15,33 @@
 import getpass
 import glob
 import os
+import pickle
 import shutil
 import tarfile
 import tempfile
+import traceback
+import uuid
 import logging
 
-import numpy as np
+from multiprocessing import Process, Queue, cpu_count
+from queue import Empty
+
 from obspy import Catalog, Stream, read, read_events
 from obspy.core.event import Comment, CreationInfo
 
-from eqcorrscan.core.match_filter.template import Template, group_templates
+from eqcorrscan.core import template_gen
+from eqcorrscan.core.match_filter.template import (
+    Template, quick_group_templates)
 from eqcorrscan.core.match_filter.party import Party
+from eqcorrscan.core.match_filter.family import Family
 from eqcorrscan.core.match_filter.helpers import (
-    _safemembers, _par_read, get_waveform_client)
-from eqcorrscan.core.match_filter.matched_filter import (
-    _group_detect, MatchFilterError)
-from eqcorrscan.core import template_gen
-from eqcorrscan.utils.pre_processing import _check_daylong
+    _safemembers, _par_read, get_waveform_client,
+    _remove_duplicates, _moveout)
+from eqcorrscan.core.match_filter.helpers.tribe import _wildcard_fill
+
+from eqcorrscan.utils.pre_processing import (
+    _quick_copy_stream, _prep_data_for_correlation)
+
 
 Logger = logging.getLogger(__name__)
 
@@ -43,6 +53,7 @@ class Tribe(object):
     :type templates: List of Template
     :param templates: The templates within the Tribe.
     """
+    _timeout = 1
 
     def __init__(self, templates=None):
         self.templates = []
@@ -50,6 +61,11 @@ def __init__(self, templates=None):
             templates = [templates]
         if templates:
             self.templates.extend(templates)
+        # Managers for Processes and Queues to be killed on errors
+        self._processes = dict()
+        self._queues = dict()
+        # Assign unique ids
+        self.__unique_ids()
 
     def __repr__(self):
         """
@@ -93,12 +109,13 @@ def __iadd__(self, other):
         >>> print(tribe)
         Tribe of 3 templates
         """
+        assert isinstance(other, (Tribe, Template)), \
+            "Must be either Template or Tribe"
+        self.__unique_ids(other)
         if isinstance(other, Tribe):
             self.templates += other.templates
         elif isinstance(other, Template):
             self.templates.append(other)
-        else:
-            raise TypeError('Must be either Template or Tribe')
         return self
 
     def __eq__(self, other):
@@ -169,6 +186,8 @@ def __getitem__(self, index):
          process length: None s
         >>> tribe[0:2]
         Tribe of 2 templates
+        >>> tribe["d"]
+        []
         """
         if isinstance(index, slice):
             return self.__class__(templates=self.templates.__getitem__(index))
@@ -183,6 +202,32 @@ def __getitem__(self, index):
                 Logger.warning('Template: %s not in tribe' % index)
                 return []
 
+    def __unique_ids(self, other=None):
+        """ Check that template names are unique. """
+        template_names = [t.name for t in self.templates]
+        if other:
+            assert isinstance(other, (Template, Tribe)), (
+                "Can only test against tribes or templates")
+            if isinstance(other, Template):
+                template_names.append(other.name)
+            else:
+                template_names.extend([t.name for t in other])
+
+        unique_names = set(template_names)
+        if len(unique_names) < len(template_names):
+            non_unique_names = [name for name in unique_names
+                                if template_names.count(name) > 1]
+            raise NotImplementedError(
+                "Multiple templates found with the same name. Template names "
+                "must be unique. Non-unique templates: "
+                f"{', '.join(non_unique_names)}")
+        return
+
+    @property
+    def _stream_dir(self):
+        """ Location for temporary streams """
+        return f".streams_{os.getpid()}"
+
     def sort(self):
         """
         Sort the tribe, sorts by template name.
@@ -256,7 +301,9 @@ def copy(self):
         >>> tribe_a == tribe_b
         True
         """
-        return copy.deepcopy(self)
+        # We can't copy processes, so we need to just copy the templates
+        other = Tribe(copy.deepcopy(self.templates))
+        return other
 
     def write(self, filename, compress=True, catalog_format="QUAKEML"):
         """
@@ -281,6 +328,11 @@ def write(self, filename, compress=True, catalog_format="QUAKEML"):
         >>> tribe = Tribe(templates=[Template(name='c', st=read())])
         >>> tribe.write('test_tribe')
         Tribe of 1 templates
+        >>> tribe.write(
+        ...    "this_wont_work.bob",
+        ...    catalog_format="BOB") # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+        TypeError: BOB is not supported
         """
         from eqcorrscan.core.match_filter import CAT_EXT_MAP
 
@@ -295,8 +347,9 @@ def write(self, filename, compress=True, catalog_format="QUAKEML"):
             if t.event is not None:
                 # Check that the name in the comment matches the template name
                 for comment in t.event.comments:
-                    if comment.text and comment.text.startswith(
-                            "eqcorrscan_template_"):
+                    if not comment.text:
+                        comment.text = "eqcorrscan_template_{0}".format(t.name)
+                    elif comment.text.startswith("eqcorrscan_template_"):
                         comment.text = "eqcorrscan_template_{0}".format(t.name)
                 tribe_cat.append(t.event)
         if len(tribe_cat) > 0:
@@ -333,6 +386,29 @@ def _par_write(self, dirname):
                 parfile.write('\n')
         return self
 
+    def _temporary_template_db(self, template_dir: str = None) -> dict:
+        """
+        Write a temporary template database of pickled templates to disk.
+
+        :param template_dir:
+            Directory to write to - if None will make a temporary directory.
+        """
+        # We use template names for filenames - check that these are unique
+        self.__unique_ids()
+        # Make sure that the template directory exists, or make a tempdir
+        if template_dir:
+            if not os.path.isdir(template_dir):
+                os.makedirs(template_dir)
+        else:
+            template_dir = tempfile.mkdtemp()
+        template_files = dict()
+        for template in self.templates:
+            t_file = os.path.join(template_dir, f"{template.name}.pkl")
+            with open(t_file, "wb") as f:
+                pickle.dump(template, f)
+            template_files.update({template.name: t_file})
+        return template_files
+
     def read(self, filename):
         """
         Read a tribe of templates from a tar formatted file.
@@ -348,13 +424,26 @@ def read(self, filename):
         >>> tribe_back = Tribe().read('test_tribe.tgz')
         >>> tribe_back == tribe
         True
+        >>> # This can also read pickled templates
+        >>> import pickle
+        >>> with open("test_tribe.pkl", "wb") as f:
+        ...    pickle.dump(tribe, f)
+        >>> tribe_back = Tribe().read("test_tribe.pkl")
+        >>> tribe_back == tribe
+        True
         """
+        if filename.endswith(".pkl"):
+            with open(filename, "rb") as f:
+                self.__iadd__(pickle.load(f))
+            return self
         with tarfile.open(filename, "r:*") as arc:
             temp_dir = tempfile.mkdtemp()
             arc.extractall(path=temp_dir, members=_safemembers(arc))
             tribe_dir = glob.glob(temp_dir + os.sep + '*')[0]
             self._read_from_folder(dirname=tribe_dir)
         shutil.rmtree(temp_dir)
+        # Assign unique ids
+        self.__unique_ids()
         return self
 
     def _read_from_folder(self, dirname):
@@ -384,14 +473,157 @@ def _read_from_folder(self, dirname):
                       if t.split(os.sep)[-1] == template.name + '.ms']
             if len(t_file) == 0:
                 Logger.error('No waveform for template: ' + template.name)
-                templates.remove(template)
                 continue
             elif len(t_file) > 1:
                 Logger.warning('Multiple waveforms found, using: ' + t_file[0])
             template.st = read(t_file[0])
+        # Remove templates that do not have streams
+        templates = [t for t in templates if t.st is not None]
         self.templates.extend(templates)
         return
 
+    def construct(self, method, lowcut, highcut, samp_rate, filt_order,
+                  length, prepick, swin="all", process_len=86400,
+                  all_horiz=False, delayed=True, plot=False, plotdir=None,
+                  min_snr=None, parallel=False, num_cores=False,
+                  skip_short_chans=False, save_progress=False, **kwargs):
+        """
+        Generate a Tribe of Templates.
+
+        :type method: str
+        :param method:
+            Method of Tribe generation. Possible options are: `from_client`,
+            `from_meta_file`.  See below on the additional required arguments
+            for each method.
+        :type lowcut: float
+        :param lowcut:
+            Low cut (Hz), if set to None will not apply a lowcut
+        :type highcut: float
+        :param highcut:
+            High cut (Hz), if set to None will not apply a highcut.
+        :type samp_rate: float
+        :param samp_rate:
+            New sampling rate in Hz.
+        :type filt_order: int
+        :param filt_order:
+            Filter level (number of corners).
+        :type length: float
+        :param length: Length of template waveform in seconds.
+        :type prepick: float
+        :param prepick: Pre-pick time in seconds
+        :type swin: str
+        :param swin:
+            P, S, P_all, S_all or all, defaults to all: see note in
+            :func:`eqcorrscan.core.template_gen.template_gen`
+        :type process_len: int
+        :param process_len: Length of data in seconds to download and process.
+        :type all_horiz: bool
+        :param all_horiz:
+            To use both horizontal channels even if there is only a pick on
+            one of them.  Defaults to False.
+        :type delayed: bool
+        :param delayed: If True, each channel will begin relative to it's own
+            pick-time, if set to False, each channel will begin at the same
+            time.
+        :type plot: bool
+        :param plot: Plot templates or not.
+        :type plotdir: str
+        :param plotdir:
+            The path to save plots to. If `plotdir=None` (default) then the
+            figure will be shown on screen.
+        :type min_snr: float
+        :param min_snr:
+            Minimum signal-to-noise ratio for a channel to be included in the
+            template, where signal-to-noise ratio is calculated as the ratio
+            of the maximum amplitude in the template window to the rms
+            amplitude in the whole window given.
+        :type parallel: bool
+        :param parallel: Whether to process data in parallel or not.
+        :type num_cores: int
+        :param num_cores:
+            Number of cores to try and use, if False and parallel=True,
+            will use either all your cores, or as many traces as in the data
+            (whichever is smaller).
+        :type save_progress: bool
+        :param save_progress:
+            Whether to save the resulting template set at every data step or
+            not. Useful for long-running processes.
+        :type skip_short_chans: bool
+        :param skip_short_chans:
+            Whether to ignore channels that have insufficient length data or
+            not. Useful when the quality of data is not known, e.g. when
+            downloading old, possibly triggered data from a datacentre
+
+        .. note::
+            *Method specific arguments:*
+
+            - `from_client` requires:
+                :param str client_id:
+                    string passable by obspy to generate Client, or any object
+                    with a `get_waveforms` method, including a Client instance.
+                :param `obspy.core.event.Catalog` catalog:
+                    Catalog of events to generate template for
+                :param float data_pad: Pad length for data-downloads in seconds
+            - `from_meta_file` requires:
+                :param str meta_file:
+                    Path to obspy-readable event file, or an obspy Catalog
+                :param `obspy.core.stream.Stream` st:
+                    Stream containing waveform data for template. Note that
+                    this should be the same length of stream as you will use
+                    for the continuous detection, e.g. if you detect in
+                    day-long files, give this a day-long file!
+                :param bool process:
+                    Whether to process the data or not, defaults to True.
+
+        .. Note::
+            Method: `from_sac` is not supported by Tribe.construct and must
+            use Template.construct.
+
+        .. Note:: Templates will be named according to their start-time.
+        """
+        templates, catalog, process_lengths = template_gen.template_gen(
+            method=method, lowcut=lowcut, highcut=highcut, length=length,
+            filt_order=filt_order, samp_rate=samp_rate, prepick=prepick,
+            return_event=True, save_progress=save_progress, swin=swin,
+            process_len=process_len, all_horiz=all_horiz, plotdir=plotdir,
+            delayed=delayed, plot=plot, min_snr=min_snr, parallel=parallel,
+            num_cores=num_cores, skip_short_chans=skip_short_chans,
+            **kwargs)
+        for template, event, process_len in zip(templates, catalog,
+                                                process_lengths):
+            t = Template()
+            if len(template) == 0:
+                Logger.error('Empty Template')
+                continue
+            t.st = template
+            t.name = template.sort(['starttime'])[0]. \
+                stats.starttime.strftime('%Y_%m_%dt%H_%M_%S')
+            t.lowcut = lowcut
+            t.highcut = highcut
+            t.filt_order = filt_order
+            t.samp_rate = samp_rate
+            t.process_length = process_len
+            t.prepick = prepick
+            event.comments.append(Comment(
+                text="eqcorrscan_template_" + t.name,
+                creation_info=CreationInfo(agency='eqcorrscan',
+                                           author=getpass.getuser())))
+            t.event = event
+            self.templates.append(t)
+        return self
+
+    def _template_channel_ids(self, wildcards: bool = False):
+        template_channel_ids = set()
+        for template in self.templates:
+            for tr in template.st:
+                # Cope with missing info and convert to wildcards
+                net, sta, loc, chan = tr.id.split('.')
+                if wildcards:
+                    net, sta, loc, chan = _wildcard_fill(
+                        net, sta, loc, chan)
+                template_channel_ids.add((net, sta, loc, chan))
+        return template_channel_ids
+
     def cluster(self, method, **kwargs):
         """
         Cluster the tribe.
@@ -404,10 +636,6 @@ def cluster(self, method, **kwargs):
             Method of stacking, see :mod:`eqcorrscan.utils.clustering`
 
         :return: List of tribes.
-
-        .. rubric:: Example
-
-
         """
         from eqcorrscan.utils import clustering
         tribes = []
@@ -426,14 +654,18 @@ def cluster(self, method, **kwargs):
     def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
                plotdir=None, daylong=False, parallel_process=True,
                xcorr_func=None, concurrency=None, cores=None,
-               ignore_length=False, ignore_bad_data=False, group_size=None,
-               overlap="calculate", full_peaks=False, save_progress=False,
-               process_cores=None, pre_processed=False, **kwargs):
+               concurrent_processing=False, ignore_length=False,
+               ignore_bad_data=False, group_size=None, overlap="calculate",
+               full_peaks=False, save_progress=False, process_cores=None,
+               pre_processed=False, check_processing=True,
+               **kwargs):
         """
         Detect using a Tribe of templates within a continuous stream.
 
-        :type stream: `obspy.core.stream.Stream`
-        :param stream: Continuous data to detect within using the Template.
+        :type stream: `Queue` or `obspy.core.stream.Stream`
+        :param stream:
+            Queue of streams of continuous data to detect within using the
+            Templates, or just the continuous data itself.
         :type threshold: float
         :param threshold:
             Threshold level, if using `threshold_type='MAD'` then this will be
@@ -472,7 +704,12 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
             'multithread', 'multiprocess', 'concurrent'. For more details see
             :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
         :type cores: int
-        :param cores: Number of workers for procesisng and detection.
+        :param cores: Number of workers for processing and detection.
+        :type concurrent_processing: bool
+        :param concurrent_processing:
+            Whether to process steps in detection workflow concurrently or not.
+            See https://github.com/eqcorrscan/EQcorrscan/pull/544 for
+            benchmarking.
         :type ignore_length: bool
         :param ignore_length:
             If using daylong=True, then dayproc will try check that the data
@@ -496,7 +733,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
             overlap = "calculate" will work out the appropriate overlap based
             on the maximum lags within templates.
         :type full_peaks: bool
-        :param full_peaks: See `eqcorrscan.utils.findpeak.find_peaks2_short`
+        :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short`
         :type save_progress: bool
         :param save_progress:
             Whether to save the resulting party at every data step or not.
@@ -509,6 +746,9 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
         :param pre_processed:
             Whether the stream has been pre-processed or not to match the
             templates.
+        :type check_processing: bool
+        :param check_processing:
+            Whether to check that all templates were processed the same.
 
         :return:
             :class:`eqcorrscan.core.match_filter.Party` of Families of
@@ -583,45 +823,414 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
             where :math:`template` is a single template from the input and the
             length is the number of channels within this template.
         """
-        party = Party()
-        template_groups = group_templates(self.templates)
-        if len(template_groups) > 1 and pre_processed:
+        # Check that template names are unique
+        self.__unique_ids()
+        # We should not need to copy the stream, it is copied in chunks by
+        # _group_process
+
+        # Argument handling
+        if overlap is None:
+            overlap = 0.0
+        elif not isinstance(overlap, float) and str(overlap) == "calculate":
+            overlap = max(
+                _moveout(template.st) for template in self.templates)
+        elif not isinstance(overlap, float):
             raise NotImplementedError(
-                "Inconsistent template processing and pre-processed data - "
-                "something is wrong!")
-        # now we can compute the detections for each group
-        for group in template_groups:
-            group_party = _group_detect(
-                templates=group, stream=stream.copy(), threshold=threshold,
-                threshold_type=threshold_type, trig_int=trig_int,
-                plot=plot, group_size=group_size, pre_processed=pre_processed,
-                daylong=daylong, parallel_process=parallel_process,
-                xcorr_func=xcorr_func, concurrency=concurrency, cores=cores,
-                ignore_length=ignore_length, overlap=overlap, plotdir=plotdir,
-                full_peaks=full_peaks, process_cores=process_cores,
-                ignore_bad_data=ignore_bad_data, arg_check=False, **kwargs)
-            party += group_party
-            if save_progress:
-                party.write("eqcorrscan_temporary_party")
+                "%s is not a recognised overlap type" % str(overlap))
+        assert overlap < self.templates[0].process_length, (
+            f"Overlap {overlap} must be less than process length "
+            f"{self.templates[0].process_length}")
+
+        # Copy because we need to muck around with them.
+        inner_kwargs = copy.copy(kwargs)
+
+        plot_format = inner_kwargs.pop("plot_format", "png")
+        export_cccsums = inner_kwargs.pop('export_cccsums', False)
+        peak_cores = inner_kwargs.pop('peak_cores',
+                                      process_cores) or cpu_count()
+        groups = inner_kwargs.pop("groups", None)
+
+        if peak_cores == 1:
+            parallel = False
+        else:
+            parallel = True
+
+        if check_processing:
+            assert len(quick_group_templates(self.templates)) == 1, (
+                "Inconsistent template processing parameters found, this is no"
+                " longer supported. \nSplit your tribe using "
+                "eqcorrscan.core.match_filter.template.quick_group_templates "
+                "and re-run for each grouped tribe")
+        sampling_rate = self.templates[0].samp_rate
+        # Used for sanity checking seed id overlap
+        template_ids = set(
+            tr.id for template in self.templates for tr in template.st)
+
+        args = (stream, template_ids, pre_processed, parallel_process,
+                process_cores, daylong, ignore_length, overlap,
+                ignore_bad_data, group_size, groups, sampling_rate, threshold,
+                threshold_type, save_progress, xcorr_func, concurrency, cores,
+                export_cccsums, parallel, peak_cores, trig_int, full_peaks,
+                plot, plotdir, plot_format,)
+
+        if concurrent_processing:
+            party = self._detect_concurrent(*args, **inner_kwargs)
+        else:
+            party = self._detect_serial(*args, **inner_kwargs)
+
+        Logger.info("Ensuring all templates are in party")
+        additional_families = []
+        for template in self.templates:
+            if template.name in party._template_dict.keys():
+                continue
+            additional_families.append(
+                Family(template=template, detections=[]))
+        party.families.extend(additional_families)
+
+        # Post-process
         if len(party) > 0:
-            for family in party:
-                if family is not None:
-                    family.detections = family._uniq().detections
+            Logger.info("Removing duplicates")
+            party = _remove_duplicates(party)
+        return party
+
+    def _detect_serial(
+        self, stream, template_ids, pre_processed, parallel_process,
+        process_cores, daylong, ignore_length, overlap, ignore_bad_data,
+        group_size, groups, sampling_rate, threshold, threshold_type,
+        save_progress, xcorr_func, concurrency, cores, export_cccsums,
+        parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format,
+        **kwargs
+    ):
+        """ Internal serial detect workflow. """
+        from eqcorrscan.core.match_filter.helpers.tribe import (
+            _pre_process, _group, _corr_and_peaks, _detect, _make_party)
+        party = Party()
+
+        assert isinstance(stream, Stream), (
+            f"Serial detection requires stream to be a stream, not"
+            f" a {type(stream)}")
+
+        # We need to copy data here to keep the users input safe.
+        st_chunks = _pre_process(
+            st=stream.copy(), template_ids=template_ids,
+            pre_processed=pre_processed,
+            filt_order=self.templates[0].filt_order,
+            highcut=self.templates[0].highcut,
+            lowcut=self.templates[0].lowcut,
+            samp_rate=self.templates[0].samp_rate,
+            process_length=self.templates[0].process_length,
+            parallel=parallel_process, cores=process_cores, daylong=daylong,
+            ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
+            overlap=overlap, **kwargs)
+
+        chunk_files = []
+        for st_chunk in st_chunks:
+            starttime = st_chunk[0].stats.starttime
+            delta = st_chunk[0].stats.delta
+            template_groups = _group(
+                sids={tr.id for tr in st_chunk},
+                templates=self.templates, group_size=group_size, groups=groups)
+            for i, template_group in enumerate(template_groups):
+                templates = [_quick_copy_stream(t.st) for t in template_group]
+                template_names = [t.name for t in template_group]
+                Logger.info(
+                    f"Prepping {len(templates)} templates for correlation")
+
+                # We need to copy the stream here.
+                _st, templates, template_names = _prep_data_for_correlation(
+                    stream=_quick_copy_stream(st_chunk), templates=templates,
+                    template_names=template_names)
+
+                all_peaks, thresholds, no_chans, chans = _corr_and_peaks(
+                    templates=templates, template_names=template_names,
+                    stream=_st, xcorr_func=xcorr_func, concurrency=concurrency,
+                    cores=cores, i=i, export_cccsums=export_cccsums,
+                    parallel=parallel, peak_cores=peak_cores,
+                    threshold=threshold, threshold_type=threshold_type,
+                    trig_int=trig_int, sampling_rate=sampling_rate,
+                    full_peaks=full_peaks, plot=plot, plotdir=plotdir,
+                    plot_format=plot_format, prepped=False, **kwargs)
+
+                detections = _detect(
+                    template_names=template_names,
+                    all_peaks=all_peaks, starttime=starttime,
+                    delta=delta, no_chans=no_chans,
+                    chans=chans, thresholds=thresholds)
+
+                chunk_file = _make_party(
+                    detections=detections, threshold=threshold,
+                    threshold_type=threshold_type,
+                    templates=self.templates, chunk_start=starttime,
+                    chunk_id=i, save_progress=save_progress)
+                chunk_files.append(chunk_file)
+        # Rebuild
+        for _chunk_file in chunk_files:
+            Logger.info(f"Adding party from {_chunk_file} to party")
+            with open(_chunk_file, "rb") as _f:
+                party += pickle.load(_f)
+            if not save_progress:
+                try:
+                    os.remove(_chunk_file)
+                except FileNotFoundError:
+                    pass
+            Logger.info(f"Added party from {_chunk_file}, party now "
+                        f"contains {len(party)} detections")
+
+        if os.path.isdir(self._stream_dir):
+            shutil.rmtree(self._stream_dir)
+        return party
+
+    def _detect_concurrent(
+        self, stream, template_ids, pre_processed, parallel_process,
+        process_cores, daylong, ignore_length, overlap, ignore_bad_data,
+        group_size, groups, sampling_rate, threshold, threshold_type,
+        save_progress, xcorr_func, concurrency, cores, export_cccsums,
+        parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format,
+        **kwargs
+    ):
+        """ Internal concurrent detect workflow. """
+        from eqcorrscan.core.match_filter.helpers.processes import (
+            _pre_processor, _prepper, _make_detections, _check_for_poison,
+            _get_and_check, Poison)
+        from eqcorrscan.core.match_filter.helpers.tribe import _corr_and_peaks
+
+        if isinstance(stream, Stream):
+            Logger.info("Copying stream to keep your original data safe")
+            st_queue = Queue(maxsize=2)
+            st_queue.put(stream.copy())
+            # Close off queue
+            st_queue.put(None)
+            stream = st_queue
+        else:
+            # Note that if a queue has been passed we do not try to keep
+            # data safe
+            Logger.warning("Streams in queue will be edited in-place, you "
+                           "should not re-use them")
+
+        # To reduce load copying templates between processes we dump them to
+        # disk and pass the dictionary of files
+        template_dir = f".template_db_{uuid.uuid4()}"
+        template_db = self._temporary_template_db(template_dir)
+
+        # Set up processes and queues
+        poison_queue = kwargs.get('poison_queue', Queue())
+
+        if not pre_processed:
+            processed_stream_queue = Queue(maxsize=1)
+        else:
+            processed_stream_queue = stream
+
+        # Prepped queue contains templates and stream (and extras)
+        prepped_queue = Queue(maxsize=1)
+        # Output queues
+        peaks_queue = Queue()
+        party_file_queue = Queue()
+
+        # Set up processes
+        if not pre_processed:
+            pre_processor_process = Process(
+                target=_pre_processor,
+                kwargs=dict(
+                    input_stream_queue=stream,
+                    temp_stream_dir=self._stream_dir,
+                    template_ids=template_ids,
+                    pre_processed=pre_processed,
+                    filt_order=self.templates[0].filt_order,
+                    highcut=self.templates[0].highcut,
+                    lowcut=self.templates[0].lowcut,
+                    samp_rate=self.templates[0].samp_rate,
+                    process_length=self.templates[0].process_length,
+                    parallel=parallel_process,
+                    cores=process_cores,
+                    daylong=daylong,
+                    ignore_length=ignore_length,
+                    overlap=overlap,
+                    ignore_bad_data=ignore_bad_data,
+                    output_filename_queue=processed_stream_queue,
+                    poison_queue=poison_queue,
+                ),
+                name="ProcessProcess"
+            )
+
+        prepper_process = Process(
+            target=_prepper,
+            kwargs=dict(
+                input_stream_filename_queue=processed_stream_queue,
+                group_size=group_size,
+                groups=groups,
+                templates=template_db,
+                output_queue=prepped_queue,
+                poison_queue=poison_queue,
+                xcorr_func=xcorr_func,
+            ),
+            name="PrepProcess"
+        )
+        detector_process = Process(
+            target=_make_detections,
+            kwargs=dict(
+                input_queue=peaks_queue,
+                delta=1 / sampling_rate,
+                templates=template_db,
+                threshold=threshold,
+                threshold_type=threshold_type,
+                save_progress=save_progress,
+                output_queue=party_file_queue,
+                poison_queue=poison_queue,
+            ),
+            name="DetectProcess"
+        )
+
+        # Cope with old tribes
+        if not hasattr(self, '_processes'):
+            self._processes = dict()
+        if not hasattr(self, '_queues'):
+            self._queues = dict()
+
+        # Put these processes into the namespace
+        self._processes.update({
+            "prepper": prepper_process,
+            "detector": detector_process,
+        })
+        self._queues.update({
+            "poison": poison_queue,
+            "stream": stream,
+            "prepped": prepped_queue,
+            "peaks": peaks_queue,
+            "party_file": party_file_queue,
+        })
+
+        if not pre_processed:
+            Logger.info("Starting preprocessor")
+            self._queues.update({"processed_stream": processed_stream_queue})
+            self._processes.update({"pre-processor": pre_processor_process})
+            pre_processor_process.start()
+
+        # Start your engines!
+        prepper_process.start()
+        detector_process.start()
+
+        # Loop over input streams and template groups
+        while True:
+            killed = _check_for_poison(poison_queue)
+            if killed:
+                Logger.info("Killed in main loop")
+                break
+            try:
+                to_corr = _get_and_check(prepped_queue, poison_queue)
+                if to_corr is None:
+                    Logger.info("Ran out of streams, exiting correlation")
+                    break
+                elif isinstance(to_corr, Poison):
+                    Logger.info("Killed in main loop")
+                    break
+                starttime, i, stream, template_names, templates, \
+                    *extras = to_corr
+                inner_kwargs = copy.copy(kwargs)  # We will mangle them
+                # Correlation specific handling to reduce single-threaded time
+                if xcorr_func == "fmf":
+                    weights, pads, chans, no_chans = extras
+                    inner_kwargs.update({
+                        'weights': weights, 'pads': pads, "no_chans": no_chans,
+                        "chans": chans, "prepped": True})
+                elif xcorr_func in (None, 'fftw'):
+                    pads, seed_ids = extras
+                    inner_kwargs.update({
+                        "pads": pads, "seed_ids": seed_ids, "prepped": True})
+
+                Logger.info(f"Got stream of {len(stream)} channels")
+                Logger.info(f"Starting correlation from {starttime}")
+
+                all_peaks, thresholds, no_chans, chans = _corr_and_peaks(
+                    templates=templates, template_names=template_names,
+                    stream=stream, xcorr_func=xcorr_func,
+                    concurrency=concurrency,
+                    cores=cores, i=i, export_cccsums=export_cccsums,
+                    parallel=parallel, peak_cores=peak_cores,
+                    threshold=threshold, threshold_type=threshold_type,
+                    trig_int=trig_int, sampling_rate=sampling_rate,
+                    full_peaks=full_peaks, plot=plot, plotdir=plotdir,
+                    plot_format=plot_format, **inner_kwargs
+                )
+                peaks_queue.put(
+                    (starttime, all_peaks, thresholds, no_chans, chans,
+                     template_names))
+            except Exception as e:
+                Logger.error(
+                    f"Caught exception in correlator:\n {e}")
+                traceback.print_tb(e.__traceback__)
+                poison_queue.put(e)
+                break  # We need to break in Main
+            i += 1
+        Logger.debug("Putting None into peaks queue.")
+        peaks_queue.put(None)
+
+        # Get the party back
+        Logger.info("Collecting party")
+        party = Party()
+        while True:
+            killed = _check_for_poison(poison_queue)
+            if killed:
+                Logger.error("Killed")
+                break
+            pf = _get_and_check(party_file_queue, poison_queue)
+            if pf is None:
+                # Fin - Queue has been finished with.
+                break
+            if isinstance(pf, Poison):
+                Logger.error("Killed while checking for party")
+                killed = True
+                break
+            with open(pf, "rb") as f:
+                party += pickle.load(f)
+            if not save_progress:
+                try:
+                    os.remove(pf)
+                except FileNotFoundError:
+                    pass
+
+        # Check for exceptions
+        if killed:
+            internal_error = poison_queue.get()
+            if isinstance(internal_error, Poison):
+                internal_error = internal_error.value
+            Logger.error(f"Raising error {internal_error} in main process")
+            # Now we can raise the error
+            if internal_error:
+                # Clean the template db
+                if os.path.isdir(template_dir):
+                    shutil.rmtree(template_dir)
+                if os.path.isdir(self._stream_dir):
+                    shutil.rmtree(self._stream_dir)
+                self._on_error(internal_error)
+
+        # Shut down the processes and close the queues
+        shutdown = kwargs.get("shutdown", True)
+        # Allow client_detect to take control
+        if shutdown:
+            Logger.info("Shutting down")
+            self._close_queues()
+            self._close_processes()
+        if os.path.isdir(template_dir):
+            shutil.rmtree(template_dir)
+        if os.path.isdir(self._stream_dir):
+            shutil.rmtree(self._stream_dir)
         return party
 
     def client_detect(self, client, starttime, endtime, threshold,
                       threshold_type, trig_int, plot=False, plotdir=None,
                       min_gap=None, daylong=False, parallel_process=True,
                       xcorr_func=None, concurrency=None, cores=None,
-                      ignore_length=False, ignore_bad_data=False,
-                      group_size=None, return_stream=False, full_peaks=False,
+                      concurrent_processing=False, ignore_length=False,
+                      ignore_bad_data=False, group_size=None,
+                      return_stream=False, full_peaks=False,
                       save_progress=False, process_cores=None, retries=3,
-                      **kwargs):
+                      check_processing=True, **kwargs):
         """
         Detect using a Tribe of templates within a continuous stream.
 
         :type client: `obspy.clients.*.Client`
-        :param client: Any obspy client with a dataselect service.
+        :param client:
+            Any obspy client (or client-like object) with a dataselect service.
         :type starttime: :class:`obspy.core.UTCDateTime`
         :param starttime: Start-time for detections.
         :type endtime: :class:`obspy.core.UTCDateTime`
@@ -669,6 +1278,11 @@ def client_detect(self, client, starttime, endtime, threshold,
             :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
         :type cores: int
         :param cores: Number of workers for processing and detection.
+        :type concurrent_processing: bool
+        :param concurrent_processing:
+            Whether to process steps in detection workflow concurrently or not.
+            See https://github.com/eqcorrscan/EQcorrscan/pull/544 for
+            benchmarking.
         :type ignore_length: bool
         :param ignore_length:
             If using daylong=True, then dayproc will try check that the data
@@ -756,7 +1370,9 @@ def client_detect(self, client, starttime, endtime, threshold,
             where :math:`template` is a single template from the input and the
             length is the number of channels within this template.
         """
-        from obspy.clients.fdsn.client import FDSNException
+        from eqcorrscan.core.match_filter.helpers.tribe import _download_st
+        from eqcorrscan.core.match_filter.helpers.processes import (
+            _get_detection_stream)
 
         # This uses get_waveforms_bulk to get data - not all client types have
         # this, so we check and monkey patch here.
@@ -767,290 +1383,192 @@ def client_detect(self, client, starttime, endtime, threshold,
                         "method, monkey-patching this")
             client = get_waveform_client(client)
 
-        party = Party()
+        if check_processing:
+            assert len(quick_group_templates(self.templates)) == 1, (
+                "Inconsistent template processing parameters found, this is no"
+                " longer supported. Split your tribe using "
+                "eqcorrscan.core.match_filter.template.quick_group_templates "
+                "and re-run for each group")
+
+        groups = kwargs.get("groups", None)
+
+        # Hard-coded buffer for downloading data, often data downloaded is
+        # not the correct length
         buff = 300
-        # Apply a buffer, often data downloaded is not the correct length
         data_length = max([t.process_length for t in self.templates])
-        pad = 0
-        for template in self.templates:
-            max_delay = (template.st.sort(['starttime'])[-1].stats.starttime -
-                         template.st.sort(['starttime'])[0].stats.starttime)
-            if max_delay > pad:
-                pad = max_delay
-        download_groups = int(endtime - starttime) / data_length
-        template_channel_ids = []
-        for template in self.templates:
-            for tr in template.st:
-                if tr.stats.network not in [None, '']:
-                    chan_id = (tr.stats.network,)
-                else:
-                    chan_id = ('*',)
-                if tr.stats.station not in [None, '']:
-                    chan_id += (tr.stats.station,)
-                else:
-                    chan_id += ('*',)
-                if tr.stats.location not in [None, '']:
-                    chan_id += (tr.stats.location,)
-                else:
-                    chan_id += ('*',)
-                if tr.stats.channel not in [None, '']:
-                    if len(tr.stats.channel) == 2:
-                        chan_id += (tr.stats.channel[0] + '?' +
-                                    tr.stats.channel[-1],)
-                    else:
-                        chan_id += (tr.stats.channel,)
-                else:
-                    chan_id += ('*',)
-                template_channel_ids.append(chan_id)
-        template_channel_ids = list(set(template_channel_ids))
+
+        # Calculate overlap
+        overlap = max(_moveout(template.st) for template in self.templates)
+        assert overlap < data_length, (
+            f"Overlap of {overlap} s is larger than the length of data to "
+            f"be downloaded: {data_length} s - this won't work.")
+
+        # Work out start and end times of chunks to download
+        chunk_start, time_chunks = starttime, []
+        while chunk_start < endtime:
+            time_chunks.append((chunk_start, chunk_start + data_length + 20))
+            chunk_start += data_length - overlap
+
+        full_stream_dir = None
         if return_stream:
-            stream = Stream()
-        if int(download_groups) < download_groups:
-            download_groups = int(download_groups) + 1
-        else:
-            download_groups = int(download_groups)
-        for i in range(download_groups):
-            bulk_info = []
-            for chan_id in template_channel_ids:
-                bulk_info.append((
-                    chan_id[0], chan_id[1], chan_id[2], chan_id[3],
-                    starttime + (i * data_length) - (pad + buff),
-                    starttime + ((i + 1) * data_length) + (pad + buff)))
-            for retry_attempt in range(retries):
-                try:
-                    Logger.info("Downloading data")
-                    st = client.get_waveforms_bulk(bulk_info)
-                    Logger.info(
-                        "Downloaded data for {0} traces".format(len(st)))
-                    break
-                except FDSNException as e:
-                    if "Split the request in smaller" in " ".join(e.args):
-                        Logger.warning(
-                            "Datacentre does not support large requests: "
-                            "splitting request into smaller chunks")
-                        st = Stream()
-                        for _bulk in bulk_info:
-                            try:
-                                st += client.get_waveforms_bulk([_bulk])
-                            except Exception as e:
-                                Logger.error("No data for {0}".format(_bulk))
-                                Logger.error(e)
-                                continue
-                        Logger.info("Downloaded data for {0} traces".format(
-                            len(st)))
-                        break
-                except Exception as e:
-                    Logger.error(e)
-                    continue
-            else:
-                raise MatchFilterError(
-                    "Could not download data after {0} attempts".format(
-                        retries))
-            # Get gaps and remove traces as necessary
-            if min_gap:
-                gaps = st.get_gaps(min_gap=min_gap)
-                if len(gaps) > 0:
-                    Logger.warning("Large gaps in downloaded data")
-                    st.merge()
-                    gappy_channels = list(
-                        set([(gap[0], gap[1], gap[2], gap[3])
-                             for gap in gaps]))
-                    _st = Stream()
-                    for tr in st:
-                        tr_stats = (tr.stats.network, tr.stats.station,
-                                    tr.stats.location, tr.stats.channel)
-                        if tr_stats in gappy_channels:
-                            Logger.warning(
-                                "Removing gappy channel: {0}".format(tr))
-                        else:
-                            _st += tr
-                    st = _st
-                    st.split()
-            st.detrend("simple").merge()
-            st.trim(starttime=starttime + (i * data_length) - pad,
-                    endtime=starttime + ((i + 1) * data_length) + pad)
-            for tr in st:
-                if not _check_daylong(tr):
-                    st.remove(tr)
-                    Logger.warning(
-                        "{0} contains more zeros than non-zero, "
-                        "removed".format(tr.id))
-            for tr in st:
-                if tr.stats.endtime - tr.stats.starttime < \
-                   0.8 * data_length:
-                    st.remove(tr)
-                    Logger.warning(
-                        "{0} is less than 80% of the required length"
-                        ", removed".format(tr.id))
+            full_stream_dir = tempfile.mkdtemp()
+
+        poison_queue = Queue()
+
+        detector_kwargs = dict(
+            threshold=threshold, threshold_type=threshold_type,
+            trig_int=trig_int, plot=plot, plotdir=plotdir,
+            daylong=daylong, parallel_process=parallel_process,
+            xcorr_func=xcorr_func, concurrency=concurrency, cores=cores,
+            ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
+            group_size=group_size, overlap=None, full_peaks=full_peaks,
+            process_cores=process_cores, save_progress=save_progress,
+            return_stream=return_stream, check_processing=False,
+            poison_queue=poison_queue, shutdown=False,
+            concurrent_processing=concurrent_processing, groups=groups)
+
+        if not concurrent_processing:
+            Logger.warning("Using concurrent_processing=True can be faster if"
+                           "downloading your data takes a long time. See "
+                           "https://github.com/eqcorrscan/EQcorrscan/pull/544"
+                           "for benchmarks.")
+            party = Party()
             if return_stream:
-                stream += st
-            try:
-                party += self.detect(
-                    stream=st, threshold=threshold,
-                    threshold_type=threshold_type, trig_int=trig_int,
-                    plot=plot, plotdir=plotdir, daylong=daylong,
-                    parallel_process=parallel_process, xcorr_func=xcorr_func,
-                    concurrency=concurrency, cores=cores,
-                    ignore_length=ignore_length,
-                    ignore_bad_data=ignore_bad_data, group_size=group_size,
-                    overlap=None, full_peaks=full_peaks,
-                    process_cores=process_cores, **kwargs)
-                if save_progress:
-                    party.write("eqcorrscan_temporary_party")
-            except Exception as e:
-                Logger.critical(
-                    'Error, routine incomplete, returning incomplete Party')
-                Logger.error('Error: {0}'.format(e))
+                full_st = Stream()
+            for _starttime, _endtime in time_chunks:
+                st = _download_st(
+                    starttime=_starttime, endtime=_endtime,
+                    buff=buff, min_gap=min_gap,
+                    template_channel_ids=self._template_channel_ids(),
+                    client=client, retries=retries)
+                if len(st) == 0:
+                    Logger.warning(f"No suitable data between {_starttime} "
+                                   f"and {_endtime}, skipping")
+                    continue
+                party += self.detect(stream=st, pre_processed=False,
+                                     **detector_kwargs)
                 if return_stream:
-                    return party, stream
-                else:
-                    return party
-        for family in party:
-            if family is not None:
-                family.detections = family._uniq().detections
-        if return_stream:
-            return party, stream
-        else:
+                    full_st += st
+            if return_stream:
+                return party, full_st
             return party
 
-    def construct(self, method, lowcut, highcut, samp_rate, filt_order,
-                  length, prepick, swin="all", process_len=86400,
-                  all_horiz=False, delayed=True, plot=False, plotdir=None,
-                  min_snr=None, parallel=False, num_cores=False,
-                  skip_short_chans=False, save_progress=False, **kwargs):
-        """
-        Generate a Tribe of Templates.
-
-        :type method: str
-        :param method:
-            Method of Tribe generation. Possible options are: `from_client`,
-            `from_meta_file`.  See below on the additional required arguments
-            for each method.
-        :type lowcut: float
-        :param lowcut:
-            Low cut (Hz), if set to None will not apply a lowcut
-        :type highcut: float
-        :param highcut:
-            High cut (Hz), if set to None will not apply a highcut.
-        :type samp_rate: float
-        :param samp_rate:
-            New sampling rate in Hz.
-        :type filt_order: int
-        :param filt_order:
-            Filter level (number of corners).
-        :type length: float
-        :param length: Length of template waveform in seconds.
-        :type prepick: float
-        :param prepick: Pre-pick time in seconds
-        :type swin: str
-        :param swin:
-            P, S, P_all, S_all or all, defaults to all: see note in
-            :func:`eqcorrscan.core.template_gen.template_gen`
-        :type process_len: int
-        :param process_len: Length of data in seconds to download and process.
-        :type all_horiz: bool
-        :param all_horiz:
-            To use both horizontal channels even if there is only a pick on
-            one of them.  Defaults to False.
-        :type delayed: bool
-        :param delayed: If True, each channel will begin relative to it's own
-            pick-time, if set to False, each channel will begin at the same
-            time.
-        :type plot: bool
-        :param plot: Plot templates or not.
-        :type plotdir: str
-        :param plotdir:
-            The path to save plots to. If `plotdir=None` (default) then the
-            figure will be shown on screen.
-        :type min_snr: float
-        :param min_snr:
-            Minimum signal-to-noise ratio for a channel to be included in the
-            template, where signal-to-noise ratio is calculated as the ratio
-            of the maximum amplitude in the template window to the rms
-            amplitude in the whole window given.
-        :type parallel: bool
-        :param parallel: Whether to process data in parallel or not.
-        :type num_cores: int
-        :param num_cores:
-            Number of cores to try and use, if False and parallel=True,
-            will use either all your cores, or as many traces as in the data
-            (whichever is smaller).
-        :type save_progress: bool
-        :param save_progress:
-            Whether to save the resulting template set at every data step or
-            not. Useful for long-running processes.
-        :type skip_short_chans: bool
-        :param skip_short_chans:
-            Whether to ignore channels that have insufficient length data or
-            not. Useful when the quality of data is not known, e.g. when
-            downloading old, possibly triggered data from a datacentre
-        :type save_progress: bool
-        :param save_progress:
-            Whether to save the resulting party at every data step or not.
-            Useful for long-running processes.
-
-        .. note::
-            *Method specific arguments:*
+        # Get data in advance
+        time_queue = Queue()
+        stream_queue = Queue(maxsize=1)
+
+        downloader = Process(
+            target=_get_detection_stream,
+            kwargs=dict(
+                input_time_queue=time_queue,
+                client=client,
+                retries=retries,
+                min_gap=min_gap,
+                buff=buff,
+                output_filename_queue=stream_queue,
+                poison_queue=poison_queue,
+                temp_stream_dir=self._stream_dir,
+                full_stream_dir=full_stream_dir,
+                pre_process=True, parallel_process=parallel_process,
+                process_cores=process_cores, daylong=daylong,
+                overlap=0.0, ignore_length=ignore_length,
+                ignore_bad_data=ignore_bad_data,
+                filt_order=self.templates[0].filt_order,
+                highcut=self.templates[0].highcut,
+                lowcut=self.templates[0].lowcut,
+                samp_rate=self.templates[0].samp_rate,
+                process_length=self.templates[0].process_length,
+                template_channel_ids=self._template_channel_ids(),
+            ),
+            name="DownloadProcess"
+        )
+
+        # Cope with old tribes
+        if not hasattr(self, '_processes'):
+            self._processes = dict()
+        if not hasattr(self, '_queues'):
+            self._queues = dict()
+        # Put processes and queues into shared state
+        self._processes.update({
+            "downloader": downloader,
+        })
+        self._queues.update({
+            "times": time_queue,
+            "poison": poison_queue,
+            "stream": stream_queue,
+        })
+
+        # Fill time queue
+        for time_chunk in time_chunks:
+            time_queue.put(time_chunk)
+        # Close off queue
+        time_queue.put(None)
+
+        # Start up processes
+        downloader.start()
+
+        party = self.detect(
+            stream=stream_queue, pre_processed=True, **detector_kwargs)
+
+        # Close and join processes
+        self._close_processes()
+        self._close_queues()
 
-            - `from_client` requires:
-                :param str client_id:
-                    string passable by obspy to generate Client, or any object
-                    with a `get_waveforms` method, including a Client instance.
-                :param `obspy.core.event.Catalog` catalog:
-                    Catalog of events to generate template for
-                :param float data_pad: Pad length for data-downloads in seconds
-            - `from_meta_file` requires:
-                :param str meta_file:
-                    Path to obspy-readable event file, or an obspy Catalog
-                :param `obspy.core.stream.Stream` st:
-                    Stream containing waveform data for template. Note that
-                    this should be the same length of stream as you will use
-                    for the continuous detection, e.g. if you detect in
-                    day-long files, give this a day-long file!
-                :param bool process:
-                    Whether to process the data or not, defaults to True.
+        if return_stream:
+            full_st = read(os.path.join(full_stream_dir, "*"))
+            shutil.rmtree(full_stream_dir)
+            return party, full_st
+        return party
 
-        .. Note::
-            Method: `from_sac` is not supported by Tribe.construct and must
-            use Template.construct.
+    def _close_processes(
+        self,
+        terminate: bool = False,
+        processes: dict = None
+    ):
+        processes = processes or self._processes
+        for p_name, p in processes.items():
+            if terminate:
+                Logger.warning(f"Terminating {p_name}")
+                p.terminate()
+                continue
+            try:
+                Logger.info(f"Joining {p_name}")
+                p.join(timeout=self._timeout)
+            except Exception as e:
+                Logger.error(f"Failed to join due to {e}: terminating")
+                p.terminate()
+            Logger.info(f"Closing {p_name}")
+            try:
+                p.close()
+            except Exception as e:
+                Logger.error(
+                    f"Failed to close {p_name} due to {e}, terminating")
+                p.terminate()
+        Logger.info("Finished closing processes")
+        return
 
-        .. Note:: Templates will be named according to their start-time.
-        """
-        templates, catalog, process_lengths = template_gen.template_gen(
-            method=method, lowcut=lowcut, highcut=highcut, length=length,
-            filt_order=filt_order, samp_rate=samp_rate, prepick=prepick,
-            return_event=True, save_progress=save_progress, swin=swin,
-            process_len=process_len, all_horiz=all_horiz, plotdir=plotdir,
-            delayed=delayed, plot=plot, min_snr=min_snr, parallel=parallel,
-            num_cores=num_cores, skip_short_chans=skip_short_chans,
-            **kwargs)
-        for template, event, process_len in zip(templates, catalog,
-                                                process_lengths):
-            t = Template()
-            for tr in template:
-                if not np.any(tr.data.astype(np.float16)):
-                    Logger.warning('Data are zero in float16, missing data,'
-                                   ' will not use: {0}'.format(tr.id))
-                    template.remove(tr)
-            if len(template) == 0:
-                Logger.error('Empty Template')
+    def _close_queues(self, queues: dict = None):
+        queues = queues or self._queues
+        for q_name, q in queues.items():
+            if q._closed:
                 continue
-            t.st = template
-            t.name = template.sort(['starttime'])[0]. \
-                stats.starttime.strftime('%Y_%m_%dt%H_%M_%S')
-            t.lowcut = lowcut
-            t.highcut = highcut
-            t.filt_order = filt_order
-            t.samp_rate = samp_rate
-            t.process_length = process_len
-            t.prepick = prepick
-            event.comments.append(Comment(
-                text="eqcorrscan_template_" + t.name,
-                creation_info=CreationInfo(agency='eqcorrscan',
-                                           author=getpass.getuser())))
-            t.event = event
-            self.templates.append(t)
-        return self
+            Logger.info(f"Emptying {q_name}")
+            while True:
+                try:
+                    q.get_nowait()
+                except Empty:
+                    break
+            Logger.info(f"Closing {q_name}")
+            q.close()
+        Logger.info("Finished closing queues")
+        return
+
+    def _on_error(self, error):
+        """ Gracefully close all child processes and queues and raise error """
+        self._close_processes(terminate=True)
+        self._close_queues()
+        Logger.info("Admin complete, raising error")
+        raise error
 
 
 def read_tribe(fname):
@@ -1070,7 +1588,7 @@ def read_tribe(fname):
 
     doctest.testmod()
     # List files to be removed after doctest
-    cleanup = ['test_tribe.tgz']
+    cleanup = ['test_tribe.tgz', "test_tribe.pkl"]
     for f in cleanup:
         if os.path.isfile(f):
             os.remove(f)
diff --git a/eqcorrscan/core/subspace.py b/eqcorrscan/core/subspace.py
index 649f9379d..13986dd01 100644
--- a/eqcorrscan/core/subspace.py
+++ b/eqcorrscan/core/subspace.py
@@ -14,7 +14,6 @@
 """
 import numpy as np
 import logging
-import time
 import h5py
 import getpass
 import eqcorrscan
@@ -410,7 +409,7 @@ def read(self, filename):
             self.name = f['data'].attrs['name'].decode('ascii')
         return self
 
-    def plot(self, stachans='all', size=(10, 7), show=True):
+    def plot(self, stachans='all', size=(10, 7), show=True, *args, **kwargs):
         """
         Plot the output basis vectors for the detector at the given dimension.
 
@@ -435,7 +434,7 @@ def plot(self, stachans='all', size=(10, 7), show=True):
             for example.
         """
         return subspace_detector_plot(detector=self, stachans=stachans,
-                                      size=size, show=show)
+                                      size=size, show=show, *args, **kwargs)
 
 
 def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0,
@@ -684,7 +683,6 @@ def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
     :return: List of delays
     :rtype: list
     """
-    from multiprocessing import Pool, cpu_count
     processed_streams = []
     if not stachans:
         input_stachans = list(set([(tr.stats.station, tr.stats.channel)
@@ -701,33 +699,27 @@ def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
                 raise IOError(
                     'All channels of all streams must be the same length')
     for st in streams:
-        if not parallel:
-            processed_stream = Stream()
-            for stachan in input_stachans:
-                dummy, tr = _internal_process(
-                    st=st, lowcut=lowcut, highcut=highcut,
-                    filt_order=filt_order, sampling_rate=sampling_rate,
-                    first_length=first_length, stachan=stachan)
+        processed = pre_processing.multi_process(
+            st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
+            samp_rate=sampling_rate, seisan_chan_names=False)
+        # Add in empty channels if needed and sort
+        processed_stream = Stream()
+        for stachan in input_stachans:
+            tr = processed.select(station=stachan[0], channel=stachan[1])
+            if len(tr) == 0:
+                tr = Trace(np.zeros(int(first_length * sampling_rate)))
+                tr.stats.station = stachan[0]
+                tr.stats.channel = stachan[1]
+                tr.stats.sampling_rate = sampling_rate
+                tr.stats.starttime = st[0].stats.starttime
+                # Do this to make more sensible plots
+                Logger.warning('Padding stream with zero trace for ' +
+                               'station ' + stachan[0] + '.' + stachan[1])
                 processed_stream += tr
-            processed_streams.append(processed_stream)
-        else:
-            pool = Pool(processes=min(cores, cpu_count()))
-            results = [pool.apply_async(
-                _internal_process, (st,),
-                {'lowcut': lowcut, 'highcut': highcut,
-                 'filt_order': filt_order, 'sampling_rate': sampling_rate,
-                 'first_length': first_length, 'stachan': stachan,
-                 'i': i}) for i, stachan in enumerate(input_stachans)]
-            pool.close()
-            try:
-                processed_stream = [p.get() for p in results]
-            except KeyboardInterrupt as e:  # pragma: no cover
-                pool.terminate()
-                raise e
-            pool.join()
-            processed_stream.sort(key=lambda tup: tup[0])
-            processed_stream = Stream([p[1] for p in processed_stream])
-            processed_streams.append(processed_stream)
+            processed_stream += tr
+        assert [(tr.stats.station, tr.stats.channel)
+                for tr in processed_stream] == input_stachans
+        processed_streams.append(processed_stream)
         if no_missed and multiplex:
             for tr in processed_stream:
                 if np.count_nonzero(tr.data) == 0:
@@ -769,30 +761,6 @@ def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate,
     return output_streams, input_stachans
 
 
-def _internal_process(st, lowcut, highcut, filt_order, sampling_rate,
-                      first_length, stachan, i=0):
-    tr = st.select(station=stachan[0], channel=stachan[1])
-    if len(tr) == 0:
-        tr = Trace(np.zeros(int(first_length * sampling_rate)))
-        tr.stats.station = stachan[0]
-        tr.stats.channel = stachan[1]
-        tr.stats.sampling_rate = sampling_rate
-        tr.stats.starttime = st[0].stats.starttime  # Do this to make more
-        # sensible plots
-        Logger.warning('Padding stream with zero trace for ' +
-                       'station ' + stachan[0] + '.' + stachan[1])
-    elif len(tr) == 1:
-        tr = tr[0]
-        tr.detrend('simple')
-        tr = pre_processing.process(
-            tr=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
-            samp_rate=sampling_rate, seisan_chan_names=False)
-    else:
-        raise IOError('Multiple channels for {0}.{1} in a single design '
-                      'stream.'.format(stachan[0], stachan[1]))
-    return i, tr
-
-
 def read_detector(filename):
     """
     Read detector from a filename.
@@ -833,8 +801,8 @@ def multi(stream):
     """
     stack = stream[0].data
     for tr in stream[1:]:
-        stack = np.dstack(np.array([stack, tr.data]))
-    multiplex = stack.reshape(stack.size, )
+        stack = np.vstack((stack, tr.data))
+    multiplex = stack.T.reshape(stack.size, )
     return multiplex
 
 
diff --git a/eqcorrscan/core/template_gen.py b/eqcorrscan/core/template_gen.py
index b431d0ea3..7f361497c 100644
--- a/eqcorrscan/core/template_gen.py
+++ b/eqcorrscan/core/template_gen.py
@@ -17,6 +17,8 @@
     GNU Lesser General Public License, Version 3
     (https://www.gnu.org/copyleft/lesser.html)
 """
+import warnings
+
 import numpy as np
 import logging
 import os
@@ -27,7 +29,7 @@
 
 from eqcorrscan.utils.sac_util import sactoevent
 from eqcorrscan.utils import pre_processing
-from eqcorrscan.core import EQcorrscanDeprecationWarning
+# from eqcorrscan.core import EQcorrscanDeprecationWarning
 
 
 Logger = logging.getLogger(__name__)
@@ -52,10 +54,11 @@ def __str__(self):
 
 def template_gen(method, lowcut, highcut, samp_rate, filt_order,
                  length, prepick, swin="all", process_len=86400,
-                 all_horiz=False, delayed=True, plot=False, plotdir=None,
-                 return_event=False, min_snr=None, parallel=False,
-                 num_cores=False, save_progress=False, skip_short_chans=False,
-                 **kwargs):
+                 all_vert=False, all_horiz=False, delayed=True, plot=False,
+                 plotdir=None, return_event=False, min_snr=None,
+                 parallel=False, num_cores=False, save_progress=False,
+                 skip_short_chans=False, vertical_chans=['Z'],
+                 horizontal_chans=['E', 'N', '1', '2'], **kwargs):
     """
     Generate processed and cut waveforms for use as templates.
 
@@ -82,6 +85,10 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
         :func:`eqcorrscan.core.template_gen.template_gen`
     :type process_len: int
     :param process_len: Length of data in seconds to download and process.
+    :type all_vert: bool
+    :param all_vert:
+        To use all channels defined in vertical_chans for P-arrivals even if
+        there is only a pick on one of them.  Defaults to False.
     :type all_horiz: bool
     :param all_horiz:
         To use both horizontal channels even if there is only a pick on one of
@@ -119,18 +126,26 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
         Whether to ignore channels that have insufficient length data or not.
         Useful when the quality of data is not known, e.g. when downloading
         old, possibly triggered data from a datacentre
+    :type vertical_chans: list
+    :param vertical_chans:
+        List of channel endings on which P-picks are accepted.
+    :type horizontal_chans: list
+    :param horizontal_chans:
+        List of channel endings for horizontal channels, on which S-picks are
+        accepted.
 
     :returns: List of :class:`obspy.core.stream.Stream` Templates
     :rtype: list
 
     .. note::
         By convention templates are generated with P-phases on the
-        vertical channel and S-phases on the horizontal channels, normal
-        seismograph naming conventions are assumed, where Z denotes vertical
-        and N, E, R, T, 1 and 2 denote horizontal channels, either oriented
-        or not.  To this end we will **only** use Z channels if they have a
-        P-pick, and will use one or other horizontal channels **only** if
-        there is an S-pick on it.
+        vertical channel [can be multiple, e.g., Z (vertical) and H
+        (hydrophone) for an ocean bottom seismometer] and S-phases on the
+        horizontal channels. By default, normal seismograph naming conventions
+        are assumed, where Z denotes vertical and N, E, 1 and 2 denote
+        horizontal channels, either oriented or not.  To this end we will
+        **only** use vertical channels if they have a P-pick, and will use one
+        or other horizontal channels **only** if there is an S-pick on it.
 
     .. warning::
         If there is no phase_hint included in picks, and swin=all, all channels
@@ -338,17 +353,13 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
             if len(st) == 0:
                 Logger.info("No data")
                 continue
+            kwargs = dict(
+                st=st, lowcut=lowcut, highcut=highcut,
+                filt_order=filt_order, samp_rate=samp_rate,
+                parallel=parallel, num_cores=num_cores, daylong=daylong)
             if daylong:
-                st = pre_processing.dayproc(
-                    st=st, lowcut=lowcut, highcut=highcut,
-                    filt_order=filt_order, samp_rate=samp_rate,
-                    parallel=parallel, starttime=UTCDateTime(starttime),
-                    num_cores=num_cores)
-            else:
-                st = pre_processing.shortproc(
-                    st=st, lowcut=lowcut, highcut=highcut,
-                    filt_order=filt_order, parallel=parallel,
-                    samp_rate=samp_rate, num_cores=num_cores)
+                kwargs.update(dict(starttime=UTCDateTime(starttime)))
+            st = pre_processing.multi_process(**kwargs)
         data_start = min([tr.stats.starttime for tr in st])
         data_end = max([tr.stats.endtime for tr in st])
 
@@ -388,8 +399,9 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
             # Cut and extract the templates
             template = _template_gen(
                 event.picks, st, length, swin, prepick=prepick, plot=plot,
-                all_horiz=all_horiz, delayed=delayed, min_snr=min_snr,
-                plotdir=plotdir)
+                all_vert=all_vert, all_horiz=all_horiz, delayed=delayed,
+                min_snr=min_snr, vertical_chans=vertical_chans,
+                horizontal_chans=horizontal_chans, plotdir=plotdir)
             process_lengths.append(len(st[0].data) / samp_rate)
             temp_list.append(template)
             catalog_out += event
@@ -471,7 +483,7 @@ def extract_from_stack(stack, template, length, pre_pick, pre_pad,
 
     #  Process the data if necessary
     if not pre_processed:
-        new_template = pre_processing.shortproc(
+        new_template = pre_processing.multi_process(
             st=new_template, lowcut=lowcut, highcut=highcut,
             filt_order=filt_order, samp_rate=samp_rate)
     # Loop through the stack and trim!
@@ -560,7 +572,7 @@ def _download_from_client(client, client_type, catalog, data_pad, process_len,
                 "percent of the desired length, will not use".format(
                     tr.stats.station, tr.stats.channel,
                     (tr.stats.endtime - tr.stats.starttime) / 3600))
-        elif not pre_processing._check_daylong(tr):
+        elif not pre_processing._check_daylong(tr.data):
             Logger.warning(
                 "Data are mostly zeros, removing trace: {0}".format(tr.id))
         else:
@@ -582,9 +594,10 @@ def _rms(array):
     return np.sqrt(np.mean(np.square(array)))
 
 
-def _template_gen(picks, st, length, swin='all', prepick=0.05,
+def _template_gen(picks, st, length, swin='all', prepick=0.05, all_vert=False,
                   all_horiz=False, delayed=True, plot=False, min_snr=None,
-                  plotdir=None):
+                  plotdir=None, vertical_chans=['Z'],
+                  horizontal_chans=['E', 'N', '1', '2']):
     """
     Master function to generate a multiplexed template for a single event.
 
@@ -607,6 +620,10 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
     :param prepick:
         Length in seconds to extract before the pick time default is 0.05
         seconds.
+    :type all_vert: bool
+    :param all_vert:
+        To use all channels defined in vertical_chans for P-arrivals even if
+        there is only a pick on one of them.  Defaults to False.
     :type all_horiz: bool
     :param all_horiz:
         To use both horizontal channels even if there is only a pick on one
@@ -630,18 +647,26 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
     :param plotdir:
         The path to save plots to. If `plotdir=None` (default) then the figure
         will be shown on screen.
+    :type vertical_chans: list
+    :param vertical_chans:
+        List of channel endings on which P-picks are accepted.
+    :type horizontal_chans: list
+    :param horizontal_chans:
+        List of channel endings for horizontal channels, on which S-picks are
+        accepted.
 
     :returns: Newly cut template.
     :rtype: :class:`obspy.core.stream.Stream`
 
     .. note::
         By convention templates are generated with P-phases on the
-        vertical channel and S-phases on the horizontal channels, normal
-        seismograph naming conventions are assumed, where Z denotes vertical
-        and N, E, R, T, 1 and 2 denote horizontal channels, either oriented
-        or not.  To this end we will **only** use Z channels if they have a
-        P-pick, and will use one or other horizontal channels **only** if
-        there is an S-pick on it.
+        vertical channel [can be multiple, e.g., Z (vertical) and H
+        (hydrophone) for an ocean bottom seismometer] and S-phases on the
+        horizontal channels. By default, normal seismograph naming conventions
+        are assumed, where Z denotes vertical and N, E, 1 and 2 denote
+        horizontal channels, either oriented or not.  To this end we will
+        **only** use vertical channels if they have a P-pick, and will use one
+        or other horizontal channels **only** if there is an S-pick on it.
 
     .. note::
         swin argument: Setting to `P` will return only data for channels
@@ -690,10 +715,14 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
     for tr in st:
         # Check that the data can be represented by float16, and check they
         # are not all zeros
-        if np.all(tr.data.astype(np.float16) == 0):
-            Logger.error("Trace is all zeros at float16 level, either gain or "
-                         "check. Not using in template: {0}".format(tr))
-            continue
+        # Catch RuntimeWarning for overflow in casting
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=RuntimeWarning)
+            if np.all(tr.data.astype(np.float16) == 0):
+                Logger.error(
+                    "Trace is all zeros at float16 level, either gain or "
+                    f"check. Not using in template: {tr}")
+                continue
         st_copy += tr
     st = st_copy
     if len(st) == 0:
@@ -735,13 +764,18 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
                     continue
                 starttime.update({'picks': s_pick})
             elif _swin == 'all':
-                if all_horiz and tr.stats.channel[-1] in ['1', '2', '3',
-                                                          'N', 'E']:
+                if all_vert and tr.stats.channel[-1] in vertical_chans:
+                    # Get all picks on vertical channels
+                    channel_pick = [
+                        pick for pick in station_picks
+                        if pick.waveform_id.channel_code[-1] in
+                        vertical_chans]
+                elif all_horiz and tr.stats.channel[-1] in horizontal_chans:
                     # Get all picks on horizontal channels
                     channel_pick = [
                         pick for pick in station_picks
                         if pick.waveform_id.channel_code[-1] in
-                        ['1', '2', '3', 'N', 'E']]
+                        horizontal_chans]
                 else:
                     channel_pick = [
                         pick for pick in station_picks
@@ -751,8 +785,11 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
                 starttime.update({'picks': channel_pick})
             elif _swin == 'P':
                 p_pick = [pick for pick in station_picks
-                          if pick.phase_hint.upper()[0] == 'P' and
-                          pick.waveform_id.channel_code == tr.stats.channel]
+                          if pick.phase_hint.upper()[0] == 'P']
+                if not all_vert:
+                    p_pick = [pick for pick in p_pick
+                              if pick.waveform_id.channel_code ==
+                              tr.stats.channel]
                 if len(p_pick) == 0:
                     Logger.debug(
                         f"No picks with phase_hint P "
@@ -760,7 +797,7 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05,
                     continue
                 starttime.update({'picks': p_pick})
             elif _swin == 'S':
-                if tr.stats.channel[-1] in ['Z', 'U']:
+                if tr.stats.channel[-1] in vertical_chans:
                     continue
                 s_pick = [pick for pick in station_picks
                           if pick.phase_hint.upper()[0] == 'S']
@@ -864,6 +901,7 @@ def _group_events(catalog, process_len, template_length, data_pad):
     :rtype: list
     """
     # case for catalog only containing one event
+    assert len(catalog), "No events to group"
     if len(catalog) == 1:
         return [catalog]
     sub_catalogs = []
diff --git a/eqcorrscan/doc/RCET_logo_transparent.png b/eqcorrscan/doc/RCET_logo_transparent.png
new file mode 100644
index 000000000..1358c8430
Binary files /dev/null and b/eqcorrscan/doc/RCET_logo_transparent.png differ
diff --git a/eqcorrscan/doc/requirements.txt b/eqcorrscan/doc/requirements.txt
index 312a9f530..22a047738 100644
--- a/eqcorrscan/doc/requirements.txt
+++ b/eqcorrscan/doc/requirements.txt
@@ -1,4 +1,5 @@
 matplotlib
+boto3
 mock
 # Note that mock is replaced by unittest.mock now, and we should update the test code to use this
 nbsphinx
diff --git a/eqcorrscan/doc/submodules/utils.correlate.rst b/eqcorrscan/doc/submodules/utils.correlate.rst
index dd05c82c9..7b12a3905 100644
--- a/eqcorrscan/doc/submodules/utils.correlate.rst
+++ b/eqcorrscan/doc/submodules/utils.correlate.rst
@@ -143,13 +143,11 @@ for example:
     >>> # do correlation using a custom function
     >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs):
     ...     # Just to keep example short call other xcorr function
-    ...     print('calling custom xcorr function')
     ...     return numpy_normxcorr(templates, stream, pads, *args, **kwargs)
 
     >>> detections = match_filter(
     ...     ['1'], [template], stream, .5, 'absolute', 1, False,
     ...     xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS
-    calling custom xcorr function...
 
 
 You can also use the set_xcorr object (eqcorrscan.utils.correlate.set_xcorr)
@@ -162,14 +160,12 @@ or within the scope of a context manager:
     >>> with set_xcorr(custom_normxcorr):
     ...     detections = match_filter(['1'], [template], stream, .5,
     ...                               'absolute', 1, False) # doctest:+ELLIPSIS
-    calling custom xcorr function...
 
     >>> # permanently set the xcorr function (until the python kernel restarts)
     >>> set_xcorr(custom_normxcorr)
     default changed to custom_normxcorr
     >>> detections = match_filter(['1'], [template], stream, .5, 'absolute',
     ...                           1, False) # doctest:+ELLIPSIS
-    calling custom xcorr function...
     >>> set_xcorr.revert()  # change it back to the previous state
 
 
diff --git a/eqcorrscan/doc/submodules/utils.pre_processing.rst b/eqcorrscan/doc/submodules/utils.pre_processing.rst
index 7fa90319a..eac1120d2 100644
--- a/eqcorrscan/doc/submodules/utils.pre_processing.rst
+++ b/eqcorrscan/doc/submodules/utils.pre_processing.rst
@@ -17,8 +17,6 @@ be padded after filtering if you decide not to use these routines (see notes in
        :toctree: autogen
        :nosignatures:
 
-       dayproc
-       process
-       shortproc
+       multi_process
 
     .. comment to end block
diff --git a/eqcorrscan/doc/tutorial.rst b/eqcorrscan/doc/tutorial.rst
index 9c07affb3..c1a52555f 100644
--- a/eqcorrscan/doc/tutorial.rst
+++ b/eqcorrscan/doc/tutorial.rst
@@ -79,10 +79,9 @@ you want to see in the tutorials please let us know on github.
   :titlesonly:
 
   tutorials/quick_start.ipynb
-  tutorials/matched-filter.rst
+  tutorials/matched-filter.ipynb
   tutorials/template-creation.rst
   tutorials/subspace.rst
-  tutorials/lag-calc.rst
   tutorials/mag-calc.rst
   tutorials/clustering.rst
 
diff --git a/eqcorrscan/doc/tutorials/.gitignore b/eqcorrscan/doc/tutorials/.gitignore
new file mode 100644
index 000000000..97f77c09b
--- /dev/null
+++ b/eqcorrscan/doc/tutorials/.gitignore
@@ -0,0 +1 @@
+tutorial_waveforms
diff --git a/eqcorrscan/doc/tutorials/lag-calc.rst b/eqcorrscan/doc/tutorials/lag-calc.rst
deleted file mode 100644
index f38d102d3..000000000
--- a/eqcorrscan/doc/tutorials/lag-calc.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-Lag-time and pick correction
-============================
-
-The following is a work-in-progress tutorial for lag-calc functionality.
-
-An important note
------------------
-Picks generated by lag-calc are relative to the start of the template waveform,
-for example, if you generated your templates with a pre_pick of 0.2, you
-should expect picks to occur 0.2 seconds before the actual phase arrival.
-The result of this is that origin-times will be shifted by the same amount.
-
-If you have applied different pre_picks to different channels when generating
-template (currently not supported by any EQcorrscan functions), then picks
-generated here will not give the correct location.
-
-Advanced Example: Parkfield 2004
---------------------------------
-
-.. literalinclude:: ../../tutorials/lag_calc.py
diff --git a/eqcorrscan/doc/tutorials/matched-filter.ipynb b/eqcorrscan/doc/tutorials/matched-filter.ipynb
new file mode 100644
index 000000000..987c12188
--- /dev/null
+++ b/eqcorrscan/doc/tutorials/matched-filter.ipynb
@@ -0,0 +1,766 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# Matched-filters\n",
+    "\n",
+    "This notebook will provide a look at using EQcorrscan's Tribe objects for matched-filter detection of earthquakes.\n",
+    "\n",
+    "This notebook extends on the ideas covered in the [Quick Start](quick_start.ipynb) notebook. In particular this\n",
+    "notebook also covers:\n",
+    "1. Concurrent execution of detection workflows for more efficient compute utilisation with large datasets;\n",
+    "2. Use of local waveform databases using [obsplus](https://github.com/niosh-mining/obsplus);\n",
+    "3. Cross-correlation pick-correction using the `lag_calc` method."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "96a39ad3defaeedc"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Set up\n",
+    "\n",
+    "We are going to focus in this notebook on using local data. For examples of how to directly use data from online providers\n",
+    "see the [Quick Start](quick_start.ipynb) notebook. \n",
+    "\n",
+    "To start off we will download some data - in your case this is likely data that you have either downloaded from one or more\n",
+    "online providers, or data that you have collected yourself. At the moment we don't care how those data are organised, as long\n",
+    "as you have continuous data on disk somewhere. We will use [obsplus](https://github.com/niosh-mining/obsplus) to work out\n",
+    "what data are were and provide us with the data that we need when we need it.\n",
+    "\n",
+    "Obsplus is great and has more functionality than we expose here - if you make use of obsplus, please cite the \n",
+    "paper by [Chambers et al., (2021)](https://joss.theoj.org/papers/10.21105/joss.02696).\n",
+    "\n",
+    "As in the [Quick Start](quick_start.ipynb) example, we will control the output level from EQcorrscan using logging."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "7c244daa3adde5ea"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "\n",
+    "logging.basicConfig(\n",
+    "    level=logging.WARNING,\n",
+    "    format=\"%(asctime)s\\t%(name)s\\t%(levelname)s\\t%(message)s\")\n",
+    "\n",
+    "Logger = logging.getLogger(\"TutorialLogger\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:18:30.192025542Z",
+     "start_time": "2023-12-01T00:18:30.187031966Z"
+    }
+   },
+   "id": "afb90fba397b3674"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We will use the March 2023 Kawarau swarm as our case-study for this. This was an energetic swarm that\n",
+    "was reported by New Zealand's GeoNet monitoring agency and discussed in a news article [here](https://www.geonet.org.nz/response/VJW80CGEPtq0JPCBHlNaR).\n",
+    "\n",
+    "We will use data from ten stations over a duration of two days. The swarm lasted longer than this, but\n",
+    "we need to limit compute resources for this tutorial! Feel free to change the end-date below to run\n",
+    "for longer.  To be kind to GeoNet and not repeatedly get data from their FDSN service we are going to get data from the AWS open data bucket. If you don't already have boto3 installed you will need to install that for this sections (`conda install boto3` or `pip install boto3`).\n",
+    "\n",
+    "NB: If you actually want to access the GeoNet data bucket using Python, a drop-in replacement from FDSN clients exists [here](https://github.com/calum-chamberlain/cjc-utilities/blob/main/src/cjc_utilities/get_data/geonet_aws_client.py)"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "b4baffba897550b8"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [],
+   "source": [
+    "def get_geonet_data(starttime, endtime, stations, outdir):\n",
+    "    import os\n",
+    "    import boto3\n",
+    "    from botocore import UNSIGNED\n",
+    "    from botocore.config import Config\n",
+    "    \n",
+    "    GEONET_AWS = \"geonet-open-data\"\n",
+    "    \n",
+    "    DAY_STRUCT = \"waveforms/miniseed/{date.year}/{date.year}.{date.julday:03d}\"\n",
+    "    CHAN_STRUCT = (\"{station}.{network}/{date.year}.{date.julday:03d}.\"\n",
+    "                  \"{station}.{location}-{channel}.{network}.D\")\n",
+    "    if not os.path.isdir(outdir):\n",
+    "        os.makedirs(outdir)\n",
+    "    \n",
+    "    bob = boto3.resource('s3', config=Config(signature_version=UNSIGNED))\n",
+    "    s3 = bob.Bucket(GEONET_AWS)\n",
+    "    \n",
+    "    date = starttime\n",
+    "    while date < endtime:\n",
+    "        day_path = DAY_STRUCT.format(date=date)\n",
+    "        for station in stations:\n",
+    "            for instrument in \"HE\":\n",
+    "                for component in \"ZNE12\":\n",
+    "                    channel = f\"{instrument}H{component}\"\n",
+    "                    chan_path = CHAN_STRUCT.format(\n",
+    "                        station=station, network=\"NZ\",\n",
+    "                        date=date, location=\"10\", channel=channel)\n",
+    "                    local_path = os.path.join(outdir, chan_path)\n",
+    "                    if os.path.isfile(local_path):\n",
+    "                        Logger.info(f\"Skipping {local_path}: exists\")\n",
+    "                        continue\n",
+    "                    os.makedirs(os.path.dirname(local_path), exist_ok=True)\n",
+    "                    remote = \"/\".join([day_path, chan_path])\n",
+    "                    Logger.debug(f\"Downloading from {remote}\")\n",
+    "                    try:\n",
+    "                        s3.download_file(remote, local_path)\n",
+    "                    except Exception as e:\n",
+    "                        Logger.debug(f\"Could not download {remote} due to {e}\")\n",
+    "                        continue\n",
+    "                    Logger.info(f\"Downloaded {remote}\")\n",
+    "        date += 86400"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:18:33.554168095Z",
+     "start_time": "2023-12-01T00:18:33.546942071Z"
+    }
+   },
+   "id": "a5e81f234705ab54"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "from obspy import UTCDateTime\n",
+    "\n",
+    "starttime, endtime = UTCDateTime(2023, 3, 17), UTCDateTime(2023, 3, 19)\n",
+    "stations = ['EDRZ', 'LIRZ', 'MARZ', 'MKRZ', 'OMRZ', 'OPRZ', 'TARZ', 'WKHS', 'HNCZ', 'KARZ']\n",
+    "\n",
+    "outdir = \"tutorial_waveforms\"\n",
+    "\n",
+    "get_geonet_data(starttime=starttime, endtime=endtime, stations=stations, outdir=outdir)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:19:54.148732360Z",
+     "start_time": "2023-12-01T00:18:34.938266088Z"
+    }
+   },
+   "id": "a4182117cbf6692c"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Great, now we have some data. EQcorrscan is well set up to use clients for data access,\n",
+    "using clients allows EQcorrscan to request the data that it needs and take care of \n",
+    "overlapping chunks of data to ensure that no data are missed: network-based\n",
+    "matched-filters apply a delay-and-stack step to the correlations from individual\n",
+    "channels. This increases the signal-to-noise ratio of the correlation sum. However,\n",
+    "because of the delay part, the stacks made at start and end of chunks of waveform\n",
+    "data do not use the full network. To get around this *you should overlap your data*.\n",
+    "\n",
+    "If you use client-based access to data, EQcorrscan will take care of this for you.\n",
+    "\n",
+    "So how do you use clients for local data? Make a local database using obsplus.\n",
+    "\n",
+    "If you don't have obsplus installed you should install it now (`conda install obsplus`\n",
+    "or `pip install obsplus`)."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "864c0532837b9fc"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "   network station location channel                  starttime  \\\n0       NZ    EDRZ       10     EHE 2023-03-17 00:00:03.528394   \n1       NZ    EDRZ       10     EHN 2023-03-17 00:00:05.458394   \n2       NZ    EDRZ       10     EHZ 2023-03-17 00:00:03.528394   \n3       NZ    KARZ       10     EHE 2023-03-17 00:00:02.963130   \n4       NZ    KARZ       10     EHN 2023-03-17 00:00:00.093130   \n5       NZ    KARZ       10     EHZ 2023-03-17 00:00:05.823130   \n6       NZ    LIRZ       10     EHE 2023-03-17 00:00:01.753132   \n7       NZ    LIRZ       10     EHN 2023-03-17 00:00:02.913132   \n8       NZ    LIRZ       10     EHZ 2023-03-17 00:00:01.463132   \n9       NZ    MARZ       10     EHE 2023-03-17 00:00:01.553130   \n10      NZ    MARZ       10     EHN 2023-03-17 00:00:01.683130   \n11      NZ    MARZ       10     EHZ 2023-03-17 00:00:00.963130   \n12      NZ    MKRZ       10     EHE 2023-03-17 00:00:01.673129   \n13      NZ    MKRZ       10     EHN 2023-03-17 00:00:00.143129   \n14      NZ    MKRZ       10     EHZ 2023-03-17 00:00:00.053129   \n15      NZ    OMRZ       10     EHE 2023-03-17 00:00:02.740000   \n16      NZ    OMRZ       10     EHN 2023-03-17 00:00:00.580000   \n17      NZ    OMRZ       10     EHZ 2023-03-17 00:00:04.110000   \n18      NZ    OPRZ       10     HHE 2023-03-17 00:00:02.993132   \n19      NZ    OPRZ       10     HHN 2023-03-17 00:00:03.473132   \n20      NZ    OPRZ       10     HHZ 2023-03-17 00:00:01.963132   \n21      NZ    TARZ       10     EHE 2023-03-17 00:00:01.850000   \n22      NZ    TARZ       10     EHN 2023-03-17 00:00:00.760000   \n23      NZ    TARZ       10     EHZ 2023-03-17 00:00:00.630000   \n\n                      endtime  \n0  2023-03-19 00:00:00.098393  \n1  2023-03-19 00:00:04.518393  \n2  2023-03-19 00:00:03.588393  \n3  2023-03-19 00:00:01.273126  \n4  2023-03-19 00:00:00.303126  \n5  2023-03-19 00:00:03.653126  \n6  2023-03-19 00:00:03.523130  \n7  2023-03-19 00:00:04.253130  \n8  2023-03-19 00:00:00.313130  \n9  2023-03-19 00:00:01.593131  \n10 2023-03-19 00:00:04.163131  \n11 2023-03-19 00:00:05.063131  \n12 2023-03-19 00:00:01.763133  \n13 2023-03-19 00:00:02.463133  \n14 2023-03-19 00:00:02.363133  \n15 2023-03-19 00:00:02.470000  \n16 2023-03-19 00:00:00.550000  \n17 2023-03-19 00:00:03.820000  \n18 2023-03-19 00:00:01.243131  \n19 2023-03-19 00:00:04.443131  \n20 2023-03-19 00:00:00.143131  \n21 2023-03-19 00:00:01.580000  \n22 2023-03-19 00:00:00.820000  \n23 2023-03-19 00:00:03.830000  ",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>network</th>\n      <th>station</th>\n      <th>location</th>\n      <th>channel</th>\n      <th>starttime</th>\n      <th>endtime</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>NZ</td>\n      <td>EDRZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:03.528394</td>\n      <td>2023-03-19 00:00:00.098393</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>NZ</td>\n      <td>EDRZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:05.458394</td>\n      <td>2023-03-19 00:00:04.518393</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>NZ</td>\n      <td>EDRZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:03.528394</td>\n      <td>2023-03-19 00:00:03.588393</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>NZ</td>\n      <td>KARZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:02.963130</td>\n      <td>2023-03-19 00:00:01.273126</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>NZ</td>\n      <td>KARZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:00.093130</td>\n      <td>2023-03-19 00:00:00.303126</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>NZ</td>\n      <td>KARZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:05.823130</td>\n      <td>2023-03-19 00:00:03.653126</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>NZ</td>\n      <td>LIRZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:01.753132</td>\n      <td>2023-03-19 00:00:03.523130</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>NZ</td>\n      <td>LIRZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:02.913132</td>\n      <td>2023-03-19 00:00:04.253130</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>NZ</td>\n      <td>LIRZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:01.463132</td>\n      <td>2023-03-19 00:00:00.313130</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>NZ</td>\n      <td>MARZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:01.553130</td>\n      <td>2023-03-19 00:00:01.593131</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>NZ</td>\n      <td>MARZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:01.683130</td>\n      <td>2023-03-19 00:00:04.163131</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>NZ</td>\n      <td>MARZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:00.963130</td>\n      <td>2023-03-19 00:00:05.063131</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>NZ</td>\n      <td>MKRZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:01.673129</td>\n      <td>2023-03-19 00:00:01.763133</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>NZ</td>\n      <td>MKRZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:00.143129</td>\n      <td>2023-03-19 00:00:02.463133</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>NZ</td>\n      <td>MKRZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:00.053129</td>\n      <td>2023-03-19 00:00:02.363133</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>NZ</td>\n      <td>OMRZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:02.740000</td>\n      <td>2023-03-19 00:00:02.470000</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>NZ</td>\n      <td>OMRZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:00.580000</td>\n      <td>2023-03-19 00:00:00.550000</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>NZ</td>\n      <td>OMRZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:04.110000</td>\n      <td>2023-03-19 00:00:03.820000</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>NZ</td>\n      <td>OPRZ</td>\n      <td>10</td>\n      <td>HHE</td>\n      <td>2023-03-17 00:00:02.993132</td>\n      <td>2023-03-19 00:00:01.243131</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>NZ</td>\n      <td>OPRZ</td>\n      <td>10</td>\n      <td>HHN</td>\n      <td>2023-03-17 00:00:03.473132</td>\n      <td>2023-03-19 00:00:04.443131</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>NZ</td>\n      <td>OPRZ</td>\n      <td>10</td>\n      <td>HHZ</td>\n      <td>2023-03-17 00:00:01.963132</td>\n      <td>2023-03-19 00:00:00.143131</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>NZ</td>\n      <td>TARZ</td>\n      <td>10</td>\n      <td>EHE</td>\n      <td>2023-03-17 00:00:01.850000</td>\n      <td>2023-03-19 00:00:01.580000</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>NZ</td>\n      <td>TARZ</td>\n      <td>10</td>\n      <td>EHN</td>\n      <td>2023-03-17 00:00:00.760000</td>\n      <td>2023-03-19 00:00:00.820000</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>NZ</td>\n      <td>TARZ</td>\n      <td>10</td>\n      <td>EHZ</td>\n      <td>2023-03-17 00:00:00.630000</td>\n      <td>2023-03-19 00:00:03.830000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from obsplus import WaveBank\n",
+    "\n",
+    "bank = WaveBank(outdir)\n",
+    "\n",
+    "bank.get_availability_df()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:19:56.010304625Z",
+     "start_time": "2023-12-01T00:19:54.151398148Z"
+    }
+   },
+   "id": "422d39dd855950a6"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Obsplus has now scanned the waveforms that we just downloaded and made a table\n",
+    "of what is there. Great. These `WaveBank` objects have a similar api to obspy\n",
+    "`Client` objects, so we can use them as a drop-in replacement.\n",
+    "\n",
+    "Now we are nearly ready to make some templates.\n",
+    "\n",
+    "## Template creation\n",
+    "\n",
+    "To make templates you need two things:\n",
+    "1. Continuous waveform data;\n",
+    "2. A catalogue of events with picks.\n",
+    "\n",
+    "We already have (1). For (2), the catalogue of events, we could use GeoNet picked\n",
+    "events, however if you have events with picks locally and want to use those\n",
+    "events as templates you should save those events in a format readable by obspy.\n",
+    "You can then skip ahead to read those picks back in.\n",
+    "\n",
+    "In the worst case scenario you have times that you know that you want your\n",
+    "template to start at, but they are not in any standard format readable by obspy,\n",
+    "you can construct events from scratch as below. Note in this example I am just\n",
+    "populating the picks as this is all we need. You do need to be careful about\n",
+    "the `waveform_id`: this should match the seed id of the continuous data\n",
+    "exactly, otherwise the picks will not be used."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "9f4dd2504fb18202"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "outputs": [],
+   "source": [
+    "from obspy.core.event import (\n",
+    "    Catalog, Event, Pick, WaveformStreamID)\n",
+    "from obspy import UTCDateTime\n",
+    "    \n",
+    "# Make the picks for the event:\n",
+    "picks = [\n",
+    "    Pick(\n",
+    "        time=UTCDateTime(2023, 3, 18, 7, 46, 15, 593125),\n",
+    "\t    waveform_id=WaveformStreamID(\n",
+    "            network_code='NZ', station_code='MARZ', \n",
+    "            channel_code='EHZ', location_code='10'),\n",
+    "        phase_hint='P'),\n",
+    "    Pick(\n",
+    "        time=UTCDateTime(2023, 3, 18, 7, 46, 17, 633115),\n",
+    "\t    waveform_id=WaveformStreamID(\n",
+    "            network_code='NZ', station_code='MKRZ', \n",
+    "            channel_code='EHZ', location_code='10'),\n",
+    "        phase_hint='P'),\n",
+    "    Pick(\n",
+    "        time=UTCDateTime(2023, 3, 18, 7, 46, 18, 110000),\n",
+    "\t    waveform_id=WaveformStreamID(\n",
+    "            network_code='NZ', station_code='OMRZ', \n",
+    "            channel_code='EHZ', location_code='10'),\n",
+    "        phase_hint='P'),\n",
+    "]  \n",
+    "# Add as many picks as you have - you might want to loop  \n",
+    "# and/or make a function to pasre your picks to obspy Picks.\n",
+    "\n",
+    "# Make the event\n",
+    "event = Event(picks=picks)\n",
+    "# Make the catalog\n",
+    "catalog = Catalog([event])"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:19:56.010572545Z",
+     "start_time": "2023-12-01T00:19:56.004559606Z"
+    }
+   },
+   "id": "cf480f12b889f227"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "For this example we are going to use a catalogue of events picked by GeoNet - we will download those data and write them to disk to mimic you using local files:"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "d482d389c5260ca6"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [],
+   "source": [
+    "from obspy.clients.fdsn import Client\n",
+    "\n",
+    "client = Client(\"GEONET\")\n",
+    "\n",
+    "cat = client.get_events(\n",
+    "    starttime=UTCDateTime(2023, 3, 17),\n",
+    "    endtime=UTCDateTime(2023, 3, 19),\n",
+    "    latitude=-38.05, longitude=176.73, \n",
+    "    maxradius=0.5, minmagnitude=3.0)  # Limited set of relevent events\n",
+    "\n",
+    "cat.write(\"tutorial_catalog.xml\", format=\"QUAKEML\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:20:11.091149707Z",
+     "start_time": "2023-12-01T00:19:56.007304525Z"
+    }
+   },
+   "id": "70ec5a6c46c54884"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Template creation with local files\n",
+    "\n",
+    "Now that we have the events and waveforms we need, we can make our Tribe of templates.\n",
+    "\n",
+    "First we have to read in the events that we want to use as templates:"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "2d8590e8fd52ed0c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "49 Event(s) in Catalog:\n",
+      "2023-03-17T14:29:34.921582Z | -38.067, +176.689 | 3.40 MLv | manual\n",
+      "2023-03-17T14:56:17.215087Z | -38.061, +176.679 | 3.07 MLv | manual\n",
+      "...\n",
+      "2023-03-18T20:20:52.842474Z | -38.045, +176.734 | 3.17 MLv | manual\n",
+      "2023-03-18T21:42:39.943071Z | -38.051, +176.735 | 4.25 MLv | manual\n",
+      "To see all events call 'print(CatalogObject.__str__(print_all=True))'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from obspy import read_events\n",
+    "\n",
+    "cat = read_events(\"tutorial_catalog.xml\")\n",
+    "print(cat)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:20:40.878853704Z",
+     "start_time": "2023-12-01T00:20:28.064446183Z"
+    }
+   },
+   "id": "bfce77794a5d15f8"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Pick curation\n",
+    "\n",
+    "You may want to limit what picks you actually use for your templates. Any picks that you provide will\n",
+    "be used for cutting waveforms - this may include amplitude picks! You should not need to restrict\n",
+    "what stations you have picks for, but it doesn't do any harm to.\n",
+    "\n",
+    "Below we select picks from the stations that we set earlier, and only P and S picks. We also limit\n",
+    "to only one P and one S pick per station - you may not want to do that, but it can get messy if you\n",
+    "have multiple picks of the same phase."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "d119b8c9c37581f5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Matched-filter GPU is not compiled! Should be here: /home/chambeca/my_programs/Building/fast_matched_filter/fast_matched_filter/lib/matched_filter_GPU.so\n"
+     ]
+    }
+   ],
+   "source": [
+    "from eqcorrscan.utils.catalog_utils import filter_picks\n",
+    "\n",
+    "cat = filter_picks(\n",
+    "    cat, \n",
+    "    stations=stations, \n",
+    "    phase_hints=[\"P\", \"S\"], \n",
+    "    enforce_single_pick=\"earliest\") "
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:20:45.250392572Z",
+     "start_time": "2023-12-01T00:20:42.068040579Z"
+    }
+   },
+   "id": "7d9ac720ccf4aa2f"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Template creation decisions\n",
+    "\n",
+    "We now have everything needed to create a tribe of templates. At this point you\n",
+    "have to make some decisions about parameters:\n",
+    "1. What filters to use;\n",
+    "2. What sampling-rate to use;\n",
+    "3. How long your template waveforms should be;\n",
+    "4. When to start your template waveforms relative to your pick times;\n",
+    "5. Whether to use separate P and S windows or not.\n",
+    "\n",
+    "Your choices for 2 and 3 should be based somewhat on your choice of what filters \n",
+    "to use (1). There is little point in using a sampling-rate significantly above\n",
+    "2.5x your high-cut frequency (2.5x because off the roll-offs used in the\n",
+    "Butterworth filters used by EQcorrscan). Lower sampling-rates will result in\n",
+    "fewer correlations and hence faster compute time, but most of the time in EQcorrscan's\n",
+    "matched-filter runs is spent in the pre-processing of the data rather than the\n",
+    "actual correlation computation.\n",
+    "\n",
+    "When deciding on filtering parameters you may find it helpful to look at what\n",
+    "frequencies have the best signal-to-noise ratio. There are functions in the\n",
+    "eqcorrscan.utils.plotting module to help with this.\n",
+    "\n",
+    "We will use some relatively standard, but un-tuned parameters for this example.\n",
+    "You should spend some time deciding: these decisions strongly affect the quality\n",
+    "of your results. You can also set the minimum signal-to-noise ratio for traces\n",
+    "to be included in your templates. Again, this should be tested.\n",
+    "\n",
+    "The `process_len` parameter controls how much data will be processed at once.\n",
+    "Because EQocrrscan computes resampling in the frequency domain, and can compute\n",
+    "correlations in the frequency domain, changing this length between construction \n",
+    "and detection affects the accuracy of the Fourier transforms which affects the\n",
+    "final correlations. For this reason the `process_len` is maintained throughout\n",
+    "the workflow by EQcorrscan. Here we use one hour (3600 seconds), but it is common\n",
+    "to use one day (86400 seconds).\n",
+    "\n",
+    "You will note that we use the `from_client` method for construction: this is\n",
+    "because we have a `WaveBank` that emulates a client making this really simple."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "df11e06b7dce0cce"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-12-01 13:20:51,205\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.876741406237065 below threshold for KARZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,206\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.KARZ.10.EHZ\n",
+      "2023-12-01 13:20:51,208\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.9540223699044974 below threshold for OMRZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,209\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+      "2023-12-01 13:20:51,274\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.5737322242199878 below threshold for LIRZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,275\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+      "2023-12-01 13:20:51,277\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.8545790599562736 below threshold for MKRZ.EHN, not using\n",
+      "2023-12-01 13:20:51,278\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MKRZ.10.EHN\n",
+      "2023-12-01 13:20:51,280\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.5224417059935123 below threshold for OMRZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,280\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+      "2023-12-01 13:20:51,283\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.9704676413162124 below threshold for TARZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,283\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.TARZ.10.EHZ\n",
+      "2023-12-01 13:20:51,315\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.6693293207326403 below threshold for LIRZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,316\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+      "2023-12-01 13:20:51,318\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.241518566731996 below threshold for OMRZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,319\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+      "2023-12-01 13:20:51,322\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.657470431167782 below threshold for TARZ.EHZ, not using\n",
+      "2023-12-01 13:20:51,322\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.TARZ.10.EHZ\n",
+      "2023-12-01 13:20:52,446\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.5290305215816998 below threshold for LIRZ.EHZ, not using\n",
+      "2023-12-01 13:20:52,446\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+      "2023-12-01 13:20:52,448\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.0144665058782114 below threshold for TARZ.EHZ, not using\n",
+      "2023-12-01 13:20:52,448\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.TARZ.10.EHZ\n",
+      "2023-12-01 13:20:53,107\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.400855581315436 below threshold for LIRZ.EHZ, not using\n",
+      "2023-12-01 13:20:53,108\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHZ\n",
+      "2023-12-01 13:20:53,109\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.3176851693587057 below threshold for MKRZ.EHZ, not using\n",
+      "2023-12-01 13:20:53,110\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MKRZ.10.EHZ\n",
+      "2023-12-01 13:20:53,112\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.3894705992621912 below threshold for OMRZ.EHE, not using\n",
+      "2023-12-01 13:20:53,112\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHE\n",
+      "2023-12-01 13:20:53,114\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.884291218472121 below threshold for OPRZ.HHZ, not using\n",
+      "2023-12-01 13:20:53,115\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OPRZ.10.HHZ\n",
+      "2023-12-01 13:20:53,173\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.1825458574844174 below threshold for MARZ.EHN, not using\n",
+      "2023-12-01 13:20:53,174\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MARZ.10.EHN\n",
+      "2023-12-01 13:20:53,177\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 3.851222353660108 below threshold for OPRZ.HHN, not using\n",
+      "2023-12-01 13:20:53,178\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OPRZ.10.HHN\n",
+      "2023-12-01 13:20:53,697\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.4135717198623312 below threshold for KARZ.EHZ, not using\n",
+      "2023-12-01 13:20:53,697\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.KARZ.10.EHZ\n",
+      "2023-12-01 13:20:53,699\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 2.3877145548728342 below threshold for LIRZ.EHE, not using\n",
+      "2023-12-01 13:20:53,699\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.LIRZ.10.EHE\n",
+      "2023-12-01 13:20:53,701\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.1370695659576338 below threshold for MARZ.EHZ, not using\n",
+      "2023-12-01 13:20:53,701\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.MARZ.10.EHZ\n",
+      "2023-12-01 13:20:53,703\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 0.22436151966111806 below threshold for OMRZ.EHZ, not using\n",
+      "2023-12-01 13:20:53,704\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OMRZ.10.EHZ\n",
+      "2023-12-01 13:20:53,705\teqcorrscan.core.template_gen\tWARNING\tSignal-to-noise ratio 1.487347592301636 below threshold for OPRZ.HHZ, not using\n",
+      "2023-12-01 13:20:53,706\teqcorrscan.core.template_gen\tWARNING\tNo pick for NZ.OPRZ.10.HHZ\n",
+      "2023-12-01 13:20:57,350\teqcorrscan.core.match_filter.tribe\tERROR\tEmpty Template\n",
+      "2023-12-01 13:20:57,352\teqcorrscan.core.match_filter.tribe\tERROR\tEmpty Template\n"
+     ]
+    }
+   ],
+   "source": [
+    "from eqcorrscan import Tribe\n",
+    "\n",
+    "tribe = Tribe().construct(\n",
+    "    method=\"from_client\",\n",
+    "    client_id=bank,\n",
+    "    catalog=cat,\n",
+    "    lowcut=2.0,\n",
+    "    highcut=15.0,\n",
+    "    samp_rate=50.0,\n",
+    "    filt_order=4,\n",
+    "    length=3.0,\n",
+    "    prepick=0.5,\n",
+    "    swin=\"all\",\n",
+    "    process_len=3600,\n",
+    "    all_horix=True,\n",
+    "    min_snr=4.0,\n",
+    "    parallel=True\n",
+    ")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:20:57.362410907Z",
+     "start_time": "2023-12-01T00:20:49.826135441Z"
+    }
+   },
+   "id": "fb993ce01cf4377a"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "You should see an ERROR message about empty templates: some of the events in our catalog\n",
+    "do not have useful data in our wavebank. We might want to set a minimum number of stations\n",
+    "used for our templates to ensure that our templates are all of reasonable quality.\n",
+    "In this case we will only retain templates with at least five stations:"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "474f94eb5efa59d5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tribe of 33 templates\n"
+     ]
+    }
+   ],
+   "source": [
+    "tribe.templates = [t for t in tribe if len({tr.stats.station for tr in t.st}) >= 5]\n",
+    "print(tribe)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:20:59.634894692Z",
+     "start_time": "2023-12-01T00:20:59.629687355Z"
+    }
+   },
+   "id": "aa7ce144ced8823a"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Matched-filter detection\n",
+    "\n",
+    "Now that we have our tribe we can use it to detect new earthquakes. Again we\n",
+    "will make use of our local `WaveBank`. This is preferred to feeding one stream\n",
+    "of data to the code at a time for two reasons:\n",
+    "1. EQcorrscan will overlap your chunks of data (in this can every hour of data)\n",
+    "   to ensure that all of the data have correlations from all channels after the\n",
+    "   delay-and-stack correlation sums.\n",
+    "2. EQcorrscan can pre-emptively process the next chunks data in parallel while\n",
+    "   computing detections in the current chunk. This can significantly speed up\n",
+    "   processing, and makes better use of compute resources.\n",
+    "\n",
+    "\n",
+    "The main decisions that you have to make at this stage are around thresholds.\n",
+    "Generally it is better to start with a relatively low threshold: you can increase\n",
+    "the threshold later using the `Party.rethreshold` method, but you can't lower\n",
+    "it without re-running the whole detection workflow.\n",
+    "\n",
+    "It is common to use `MAD` thresholding, but you should experiment with your\n",
+    "dataset to see what works best."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "231442eea4647717"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-12-01 13:21:02,031\teqcorrscan.core.match_filter.tribe\tWARNING\tUsing concurrent_processing=True can be faster ifdownloading your data takes a long time. See https://github.com/eqcorrscan/EQcorrscan/pull/544for benchmarks.\n",
+      "2023-12-01 13:22:27,562\teqcorrscan.core.match_filter.helpers.tribe\tWARNING\tRemoved data for NZ.EDRZ.10.EHE NZ.EDRZ.10.EHN NZ.EDRZ.10.EHZ NZ.KARZ.10.EHE NZ.KARZ.10.EHN NZ.KARZ.10.EHZ NZ.LIRZ.10.EHE NZ.LIRZ.10.EHN NZ.LIRZ.10.EHZ NZ.MARZ.10.EHE NZ.MARZ.10.EHN NZ.MARZ.10.EHZ NZ.MKRZ.10.EHE NZ.MKRZ.10.EHN NZ.MKRZ.10.EHZ NZ.OMRZ.10.EHE NZ.OMRZ.10.EHN NZ.OMRZ.10.EHZ NZ.OPRZ.10.HHE NZ.OPRZ.10.HHN NZ.OPRZ.10.HHZ NZ.TARZ.10.EHN NZ.TARZ.10.EHZ due to less than 80% of the required length.\n",
+      "2023-12-01 13:22:27,563\teqcorrscan.core.match_filter.tribe\tWARNING\tNo suitable data between 2023-03-18T23:50:43.199856Z and 2023-03-19T00:51:03.199856Z, skipping\n"
+     ]
+    }
+   ],
+   "source": [
+    "party = tribe.client_detect(\n",
+    "    client=bank,\n",
+    "    starttime=starttime,\n",
+    "    endtime=endtime,\n",
+    "    threshold=10.0,\n",
+    "    threshold_type=\"MAD\",\n",
+    "    trig_int=1.0,\n",
+    ")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:22:27.605964306Z",
+     "start_time": "2023-12-01T00:21:02.016248197Z"
+    }
+   },
+   "id": "b7f7332ffa3ffde3"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "125\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(party))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:42:38.678673425Z",
+     "start_time": "2023-12-01T00:42:38.668416131Z"
+    }
+   },
+   "id": "45dd26bad62d8e17"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "<Figure size 756x540 with 1 Axes>",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAroAAAG3CAYAAABbi/ZaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAxjklEQVR4nO3debRkZXXw/++muwEZFWkmUQEZImAasVEBkYaOiFxewKWIERGUN20EMcbEN/DqT4ivqIlDMMSpFWRwQAQMkDYx0DIILsUGIUwxgEwNCA2KjALd7N8f59ymbnHv7TtU1Tl16vtZq1ZVnao6tW/trq5dT+3zPJGZSJIkSU2zWtUBSJIkSd1goStJkqRGstCVJElSI1noSpIkqZEsdCVJktRIFrqSJElqpJlVB6CJ2XDDDXOLLbaoOgxJkqR6ufpqroYHM3N2+00Wun1iiy22YMmSJVWHIUmSVC8RBNw52k22LkiSJKk3hoYgorOncVjoSpIkqTd+9KOePp2tC33qkUce4bHHHqs6jL42Y8YM1l13XdZaa62qQ5EkqXmGhsYubDM7+1xjjOxa6Papxx57jE022YTVVnNQfioyk2eeeYYHH3zQQleSpG4Yq8jdb7+ehWCh28cscqcuIlh99dWrDkOSpObr9OjtJFgpaZW23nprzjrrrJXX582bx/LlyznhhBO4+OKLR9z3tNNO49lnn+3o84/2PMPuuOMOfvKTn3T0+SRJUjNY6Gpc1113HXvssQcXXnjhhO7fjUJ3PBa6kiRpLBa6Gtd5553HUUcdxRNPPMFTTz017n2vuuoqrr32WubPn8+ZZ57Jrbfeyj777MOee+7Jpz71KQCOOOIIPvzhD7P77rtz4okn8sEPfpDXvOY1nHrqqStvP+qoo3jjG9/IJz7xiRH7v/fee9lrr714wxvewFFHHQXAwoULOfPMM5k/fz4An/zkJ5k3bx577703d9xxB7feeiu77bYbe+21F5/+9Kc7/fJIkqQas9Dtd12eg+6aa65hl112Yd999x2zfWDYa1/7WnbaaScWL17MYYcdxsc+9jFOOeUULrvsMm688UaWLl0KwJ577smVV17JGWecwZFHHsnPfvYzTjnllJX72X333bn88su55ppruOeee1Zu33DDDbnooou44ooreOSRR7jllltYsGABhx12GIsXL+b666/nnnvu4dJLL+XLX/4yn/nMZ7j00ktZsGABl1xyCccdd9w0X2xJkhqoG3PbrmJ+217xYLQOiIhTgf2BBzJzx3Lb54D/BTwN3Aa8NzMfLm87DjgSWAF8KDN/XEXcq3Lbbbdxww03sO+++/LUU0+x7bbbMjQ0NOHH//rXv+awww4D4OGHH15ZtO64444AbLrppuy4447MmjWLaHlDvPrVrwbgVa96FbfffvvK7Q899BAf+MAHePjhh7njjju49957RzzfzTffzKWXXsq8efNW7v8d73gHJ5xwAoceeijvfve7ectb3jL5F0KSpCbr5ty2PZxhYTQWup1xGvAvwBkt2y4CjsvM5RHxD8BxwN9FxPbAO4EdgM2AiyNi28xcMaVn7uKRjOeeey7f/OY3V7YFHHDAAaxYMX6Ys2bNYsWKFcycOZPtttuOk046iU033ZQVK1YQEXz1q18dUdTGKN/4rrvuOrbffntuuOEGjj766JXbv/vd73LQQQdxxBFHcOihh5KZK58PYLvttmOfffbh5JNPBuCZZ55h+fLlfPGLX+Tpp59m9913t9CVJGksFc6O0C22LnRAZl4O/K5t239m5vLy6s+BzcvLBwJnZeZTmXk7cCvw2p4FOwmLFi1it912W3l9++2354orrhj3MUNDQxx00EGce+65nHjiibzvfe9j7733Zr/99uOJJ56Y0PNedtll7LHHHsyZM4fNN9985fa9996bL3zhCxx00EE8/vjjQDE6fOWVV3LIIYcwZ84cNtlkE+bNm8dee+3Ft771LS644AL22GMPdt11V971rndN4VWQJKmPTKUNocEiG1i9VyEitgD+bbh1oe22C4HvZ+a3I+JfgJ9n5rfL204B/j0zzxnlcQuABQAve9nLXnPnnXeuvO3ee+9ls80268rfUqUjjjiCj3/842y99dY9eb6mvo6SpAE11cJ1v/1g0aLOxtJDEXF1Zs5t327rQpdFxMeA5cB3hjeNcrdRv21k5kJgIcDcuXP9RiJJkibGgUzAQrerIuJwioPU5udzQ+dLgZe23G1z4N72xw6q0047reoQJEl1MDTU3YOkNBDs0e2SiNgX+DvggMxsbU69AHhnRKwREVsC2wBXTeU5bDuZHl8/Saoxi9ypq3imgzpxRLcDIuJ7wDxgw4hYChxPMcvCGsBF5cwCP8/Mv8zMGyPibOAmipaGo6cy48KMGTN45plnWH311Tv1ZwyUzOSxxx5j1qxZVYciSRqPgxKaBg9G6xNz587NJUuWrLz+xBNP8PDDD1cXUAPMmjWLDTbYgBkzZlQdiiSp3fBBVdYpmgAPRmuYtdZai7XWWqvqMCRJkmrLHl1JkiQ1koWuJEmqj+EFD6QOsNCVJEn10TrbgrMHaJrs0ZUkSfXjQWjqAEd0JUlS7w23KLSfpA6y0JUkSb033oIQtiyoQ2xdkCRJ1bFFQV3kiK4kSeoeWxRUIQtdSZLUPbYoqEK2LkiSpO6zRUEVcERXkiR1xmhtClKFLHQlSVJnjNWmYIuCKmLrgiRJ6izbFFQTjuhKkqSxjTVrgjMpqA9Y6EqSpLGNN2vCaGxTUI3YuiBJklbNdgT1IUd0JUkaJJNpRbAdQX3OQleSpEEy2VYEsB1BfcvWBUmSBpGtCBoAjuhKktRk7a0K0gCx0JUkqclGa1WwFUEDwtYFSZIGga0KGkCO6EqSJKmRLHQlSZLUSBa6kiRJaiQLXUmS+t14i0BIA8xCV5KkfreqRSCcZUEDylkXJElqCmdWkEZwRFeSpH42NFR1BFJtWehKktTPhtsWbE+QnsdCV5KkftU6mrtoUXVxSDVloStJUr9yNFcal4WuJEn9ztFcaVQWupIkddt489xO5yRpXBa6kiR126rmuZ0O2xakMTmPriRJveI8t1JPOaIrSZKkRrLQlSRJUiNZ6EqSJKmRLHQlSeqk0WZYkFQJC11JkjpprBkWnB1B6jlnXZAkqRucYUGqnCO6kiRNR3urgqTasNCVJGk6RmtVsE1BqgVbFyRJ6gRbFaTacURXkiRJjWShK0mSpEay0JUkSVIjWehKkjQVw7MtSKotC90OiIhTI+KBiLihZdsGEXFRRNxSnr+o5bbjIuLWiPh1RLy5mqglSdPSOtuCsyxItWSh2xmnAfu2bTsWWJyZ2wCLy+tExPbAO4Edysd8JSJm9C5USVJHZcKiRVVHIWkUFrodkJmXA79r23wgcHp5+XTgoJbtZ2XmU5l5O3Ar8NpexClJkjRILHS7Z+PMvA+gPN+o3P4S4O6W+y0ttz1PRCyIiCURsWTZsmVdDVaSJKlpLHR7b7QjF0adZTwzF2bm3MycO3v27C6HJUmS1CwWut1zf0RsClCeP1BuXwq8tOV+mwP39jg2SZKkxrPQ7Z4LgMPLy4cD57dsf2dErBERWwLbAFdVEJ8kSVKjzaw6gCaIiO8B84ANI2IpcDzwWeDsiDgSuAs4GCAzb4yIs4GbgOXA0Zm5opLAJUmSGsxCtwMy88/HuGn+GPc/ETixexFJkrpmaGjkHLqSasvWBUmSJsOFIqS+4YiuJElTkaNOmCOpRhzRlSSp1dAQRIx9ktQ3LHQlSWo1kf5bWxakvmDrgiRJo7E1Qep7juhKkiSpkSx0JUmS1EgWupIkSWokC11JUv9Y1YwInThJagwLXUlS/+jVimTOqiA1grMuSJL6jzMiSJoAR3QlSZLUSBa6kiRJaiQLXUmSJDWSha4kqX7Gml1BkibBQleSVD/jza7gjAiSJshZFyRJ9eXsCpKmwRFdSVK9DA1VHYGkhrDQlSTVy3Dbgi0KkqbJQleSNL5eLLs72gFnixZV9zdLagQLXUnS+Hq17G4rR3MldYAHo0mSJsYDwyT1GUd0JUmS1EgWupIkSWokC11JkiQ1koWuJGlszmkrqY9Z6EqSxuactpL6mIWuJGnVnNNWUh+y0JWkQTWRhSAkqY9Z6ErSoJroQhC2LUjqUy4YIUmDzoUgJDWUI7qS1I8m0nZgW4KkAWehK0n9aKJtB6tiW4KkBrN1QZL6mW0HkjQmR3Qlqd+4iIMkTYiFriT1GxdxkKQJsdCVpH7lIg6SNC4LXUmqu/YZFiRJE2KhK0l1N9oMC7YtSNIqOeuCJPULZ1iQpElxRFeSqjLRRR8kSVNioStJVZnMog+2KkjSpNm6IElVsyVBkrrCEV1J6oXR2hQkSV1loStJvTBWm4ItCZLUNbYuSFIv2aYgST3jiK4kddNwy4IkqecsdCWpm1pbFmxTkKSesnVBknrBlgVJ6jlHdLssIv46Im6MiBsi4nsRsWZEbBARF0XELeX5i6qOU1IXDA1VHYEkDTQL3RYR8Y8RsV5EzIqIxRHxYES8exr7ewnwIWBuZu4IzADeCRwLLM7MbYDF5XVJTTPctmDLgiRVwkJ3pH0y8xFgf2ApsC3w0WnucybwgoiYCawF3AscCJxe3n46cNA0n0NSnS1aVHUEkjSQLHRHmlWe7wd8LzN/N52dZeY9wOeBu4D7gD9k5n8CG2fmfeV97gM2ms7zSKoh2xYkqXIWuiNdGBH/DcwFFkfEbOCPU91Z2Xt7ILAlsBmw9mRaISJiQUQsiYgly5Ytm2oYkqpg24IkVc5Ct0VmHgvsStFT+wzwOEWhOlV/BtyemcvK/Z0H7AbcHxGbApTnD4wRz8LMnJuZc2fPnj2NMCRVxrYFSaqM04s93yuBLcqe2mFnTHFfdwGvj4i1gCeB+cASigL6cOCz5fn5Uw9XkiRJo7HQbRERZwKvAK4FVpSbkykWupn5i4g4B7gGWA78ClgIrAOcHRFHUhTDB08vckmSJLWz0B1pLrB9Zudmds/M44Hj2zY/RTG6K0mSpC6xR3ekG4BNqg5CUp9zxgVJqgVHdEfaELgpIq6iGHUFIDMPqC4kSX3HGRckqRYsdEc6oeoAJDWIMy5IUqUsdFtk5mURsTGwS7npqswcdeovSZIk1Zs9ui0i4h3AVRSzILwD+EVEvL3aqCRJkjQVjuiO9DFgl+FR3HJltIuBcyqNSpIkSZPmiO5Iq7W1KjyEr5GkiRoagoiqo5AklRzRHek/IuLHwPfK64cAP6owHkn95Ect/10444IkVc5Ct0VmfjQi3gbsDgSwMDN/WHFYkupmaGhkUduuc2vOSJKmwUK3TWaeC5xbdRySamy8IteRXEmqDQtdICKuyMw3RMSjQOtQTACZmetVFJqkOnPkVpJqzQOtgMx8Q3m+bmau13Ja1yJXGkDDB5WNdZIk9QUL3RYRceZEtklquPFaE4bZoiBJtWfrwkg7tF6JiJnAayqKRVLVbE2QpL7miC4QEceV/bl/GhGPlKdHgfuB8ysOT1IvDQ1VHYEkqUMsdIHM/Exmrgt8rq0/98WZeVzV8UnqoeG2BVsTJKnvWeiOdFVErD98JSJeGBEHVRiPpKosWlR1BJKkabLQHen4zPzD8JXMfBg4vrpwJEmSNFUWuiON9np4wJ4kSVIfstAdaUlEfDEiXhERW0XEPwFXVx2UJEmSJs9Cd6RjgKeB7wNnA08CR1cakaTeGF4kQpLUGP4s3yIzHweOjYh1MvOxquOR1EOti0Q444IkNYIjui0iYreIuAm4qbw+JyK+UnFYknop0xkXJKkhLHRH+ifgzcBDAJl5HfDGSiOS1H0uEiFJjWSh2yYz727btKKSQCT1jotESFIj2aM70t0RsRuQEbE68CHg5opjktQrtixIUqM4ojvSX1LMsvASYCmwE3BUlQFJ6qDhmRXaT5KkRnJEd6TtMvPQ1g0RsTtwZUXxSOqk1pkV2tm2IEmNY6E70snAzhPYJqmfZVYdgSSpByx0gYjYFdgNmB0RH2m5aT1gRjVRSZq0oaHxR20lSQPFQrewOrAOxeuxbsv2R4C3VxKRpMmbSJFri4IkDQwLXSAzLwMui4jTMvPOiFi7XCVNUj+yNUGShLMutNusXBntZnBlNKkvtM6kIElSCwvdkU7CldGk/tLermBrgiSpZOtCm8y8O0aODLkymtQPbFeQJLVxRHekESujRcTf4spoUjXGWtzBxR4kSRNkoTvSaCujHV1lQNLAmsw0YbYrSJJGYetCi8x8EDh0lXeU1Du2JEiSpshCF4iIk4ExP00z80M9DEcaXC74IEnqIFsXCkuAq4E1KZb7vaU87YQHo0m94wwKkqQOckQXyMzTASLiCGCvzHymvP414D8rDE0aTLYrSJI6wBHdkTZj5BLA65TbJHXSWDMqSJLUQY7ojvRZ4FcRcUl5fU/ghOrCkRpqvD5c2xUkSR1iodsiM78VEf8OvK7cdGxm/rbKmKRGs0VBktRFti60yczfZub55ckiV+qU1nYFSZJ6wEJXUm84o4IkqcdsXZDUW7YrSJJ6xBHdNhHxhoh4b3l5dkRsWXVMUi2MNVPCRE+SJPWYhW6LiDge+DvguHLTLODb1UUk1UgnViyzXUGS1EO2Loz0VuDVwDUAmXlvRKw7/kOkAWPrgSSpTziiO9LTmZlAAkTE2tPdYUS8MCLOiYj/joibI2LXiNggIi6KiFvK8xdNO3KpG5wpQZLUxyx0Rzo7Ir4OvDAi/gK4GPjGNPf5JeA/MvNPgDnAzcCxwOLM3AZYXF6X6seZEiRJfczWhRaZ+fmIeBPwCLAd8InMvGiq+4uI9YA3AkeU+38aeDoiDgTmlXc7HbiUojdYqifbFSRJfchCt0VE/DXwg+kUt222ApYB34qIOcDVwF8BG2fmfQCZeV9EbNSh55M6Z2io6ggkSZoWWxdGWg/4cUT8NCKOjoiNp7m/mcDOwFcz89XA40yiTSEiFkTEkohYsmzZsmmGIk3ScNuC7QqSpD5lodsiM/8+M3cAjgY2Ay6LiIunsculwNLM/EV5/RyKwvf+iNgUoDx/YIx4Fmbm3MycO3v27GmEIU3DokVVRyBJ0pRY6I7uAeC3wEPAlNsKMvO3wN0RsV25aT5wE3ABcHi57XDg/KmHKkmSpNHYo9siIj4AHALMphh9/YvMvGmauz0G+E5ErA78BngvxReMsyPiSOAu4OBpPockSZLaWOiO9HLgw5l5bad2WO5r7ig3ze/Uc0gd54FokqQGsNClmAYsMx8B/rG8vkHr7Zn5u0oCk6rigWiSpAaw0C18F9ifYvqvBFqXgUqKacKkweOBaJKkPmahC2Tm/uX5llXHIlVqaOj5q6FJktSnnHWhRUQsnsg2qbFai1zbFiRJfc4RXSAi1gTWAjaMiBfxXOvCehTz6UqDxSV/JUkN4Ihu4f0U/bl/Up4Pn84HvlxhXFL3DQ1BRHGSJKlBHNEFMvNLwJci4pjMPLnqeKSeau/JtWVBktQQFrotMvPkiNgR2B5Ys2X7GdVFJfWI7QqSpIax0G0REccD8ygK3R8BbwGuACx0JUmS+ow9uiO9nWLFst9m5nuBOcAa1YYkSZKkqbDQHenJzHwWWB4R6wEP4GIRkiRJfcnWhZGWRMQLgW9QzLrwGHBVpRFJkiRpSix0W2TmUeXFr0XEfwDrZeZ/VRmTJEmSpsZCF4iInce7LTOv6WU8kiRJmj4L3cIXxrktgb17FYgkSZI6w0IXyMy9qo5BkiRJnWWh2yIi3jPadheMkCRJ6j8WuiPt0nJ5TYo5da/BBSPURENDz1/+V5KkBrHQbZGZx7Rej4j1gTMrCkfqrtYid7/9qotDkqQusdAd3xPANlUHIXVVZtURSJLUFRa6LSLiQopZFqBYNW574OzqIpI6wBYFSdKAstAd6fMtl5cDd2bm0qqCkTpivCLXlgVJUoNZ6LbIzMsAImI9ytcmIjbIzN9VGpjUCbYoSJIGjIVui4hYAPw/4EngWSAoWhm2qjIuSZIkTZ6F7kgfBXbIzAerDkSSJEnTs1rVAdTMbRQzLUiSJKnPOaI70nHAzyLiF8BTwxsz80PVhSRNgTMtSJJkodvm68BPgOspenSl/tRe5Dq7giRpAFnojrQ8Mz9SdRBSxzjTgiRpgNmjO9IlEbEgIjaNiA2GT1UHJUmSpMlzRHekd5Xnx7Vsc3oxSZKkPmSh2yIzt6w6BkmSJHWGhW6LiHjPaNsz84xexyJNibMtSJK0koXuSLu0XF4TmA9cA1joqj+0FrnOtCBJGnAWui0y85jW6xGxPnBmReFIU+dsC5IkOevCKjwBbFN1ENKEDA1VHYEkSbXiiG6LiLiQYpYFKL4EbA+cXV1E0iQMty3YsiBJEmCh2+7zLZeXA3dm5tKqgpGmZNGiqiOQJKkWLHSBiNga2DgzL2vbvkdErJGZt1UUmjQxti1IkvQ89ugWTgIeHWX7k+VtUr3ZtiBJ0vNY6Ba2yMz/at+YmUuALXofjjRFti1IkrSShW5hzXFue0HPopAma2gIIqqOQpKkWrLQLfwyIv6ifWNEHAlcXUE80sS4QIQkSWPyYLTCh4EfRsShPFfYzgVWB95aVVDSSqta2tcFIiRJeh4LXSAz7wd2i4i9gB3LzYsy8ycVhiU9Z7wi15FcSZJGZaHbIjMvAS6pOg5pTI7cSpI0YfboSnXnHLmSJE2Jha5Ud86RK0nSlFjoSv3COXIlSZoUC11JkiQ1koVul0XEjIj4VUT8W3l9g4i4KCJuKc9fVHWMkiRJTWSh231/Bdzccv1YYHFmbgMsLq9LkiSpwyx0uygiNgeGgG+2bD4QOL28fDpwUI/DUj9xxgVJkqbMQre7TgL+D/Bsy7aNM/M+gPJ8o7EeHBELImJJRCxZtmxZVwNVTTnjgiRJU2ah2yURsT/wQGZevco7jyEzF2bm3MycO3v27A5Gp77jjAuSJE2ahW737A4cEBF3AGcBe0fEt4H7I2JTgPL8gepCVC0NDUFEcZIkSVNmodslmXlcZm6emVsA7wR+kpnvBi4ADi/vdjhwfkUhqq6G2xWG2bYgSdKUzKw6gAH0WeDsiDgSuAs4uOJ4VFeZVUcgSVJfs9Dtgcy8FLi0vPwQML/KeFRjzrIgSVLH2Log1YmzLEiS1DEWulIdOcuCJEnTZqEr9VLrjAqjnSRJUsdY6Eq91D6jwmhsW5AkqSM8GE2qgjMqSJLUdY7oSpIkqZEsdCVJktRIFrqSJElqJAtdqdtaZ1qQJEk9Y6ErdVv7TAvOqiBJUk8464LUK860IElSTzmiK0mSpEay0JUkSVIjWehKkiSpkSx0pW5wpgVJkipnoSt1gzMtSJJUOWddkLrJmRYkSaqMI7rSVLS2Jox2kiRJlbPQlaaivTVhNLYrSJJUKVsXpOmwNUGSpNpyRFeSJEmNZKErSZKkRrLQlSRJUiNZ6EqrMtoMC5IkqfYsdKVVGWuGBWdVkCSp1px1QZooZ1iQJKmvOKIrjaa1XUGSJPUlC11pNO3tCrYpSJLUd2xdkMZju4IkSX3LEV2p3dBQ1RFIkqQOsNCV2g23LdiuIElSX7PQlcayaFHVEUiSpGmw0JUkSVIjWehKkiSpkSx0pfYlfiVJUiNY6EqjLfHrgWiSJPU959GVhjlnriRJjeKIrgaTS/xKktR4FroaTC7xK0lS49m6oMFmu4IkSY3liK4Gh+0KkiQNFAtdDQ7bFSRJGii2Lmjw2K4gSdJAcERXzdS+CITtCpIkDRwLXTXTaItAgO0KkiQNEFsX1Gy2KUiSNLAc0ZUkSVIjWehKkiSpkSx0JUmS1EgWumqW4dkWJEnSwLPQ7aKIeGlEXBIRN0fEjRHxV+X2DSLiooi4pTx/UdWxNkbrbAvOsCBJ0kCz0O2u5cDfZOYrgdcDR0fE9sCxwOLM3AZYXF5XJ2XCokVVRyFJkipkodtFmXlfZl5TXn4UuBl4CXAgcHp5t9OBgyoJsGmGhqqOQJIk1YiFbo9ExBbAq4FfABtn5n1QFMPARmM8ZkFELImIJcuWLetZrH1ruG3BlgVJkoSFbk9ExDrAucCHM/ORiT4uMxdm5tzMnDt79uzuBdg0tixIkiQsdLsuImZRFLnfyczzys33R8Sm5e2bAg9UFV/fG55lwZkWJElSGwvdLoqIAE4Bbs7ML7bcdAFweHn5cOD8XsfWGK2zLIBtC5IkaaWZVQfQcLsDhwHXR8S15bb/C3wWODsijgTuAg6uJrwGyaw6AkmSVDMWul2UmVcAY/2mPr+XsUiSJA0aWxckSZLUSBa6kiRJaiQLXfUvF4iQJEnjsNBV/3KBCEmSNA4LXfU/F4iQJEmjsNCVJElSI1noSpIkqZEsdCVJktRIFrrqP0NDEGOtwyFJklSw0FX/GZ5tAZxxQZIkjcklgNW/MquOQJIk1ZgjuuovLhIhSZImyEJX/cVFIiRJ0gRZ6Ko/uUiEJElaBQtd9Q/bFiRJ0iRY6Kp/2LYgSZImwUJX/ce2BUmSNAEWupIkSWokC11JkiQ1koWu+oMHokmSpEmy0FV/8EA0SZI0SRa66i8eiCZJkibIQlf1Z9uCJEmaAgtd1Z9tC5IkaQosdNU/bFuQJEmTYKGr6gwNQcSqT5IkSVNgoavqDLckTIRtC5IkaZJmVh2ARGbVEUiSpAZyRFe91dquIEmS1EUWuuqt9nYFWxIkSVKX2LqgatiuIEmSuswRXUmSJDWSha4kSZIayUJXkiRJjWShq94ZGqo6AkmSNEAsdNU7wzMuONOCJEnqAQtd9d6iRVVHIEmSBoCFrnrDtgVJktRjFrrqDdsWJElSj1noqrdsW5AkST1ioavuGhqCiKqjkCRJA8hCV9013LIAti1IkqSemll1ABoQmVVHIEmSBowjupIkSWokC11JkiQ1koWuJEmSGslCV8/NjNCNkyRJUkUsdDVyZoRucLYFSZJUAWdd0HOcGUGSJDWII7oViYh9I+LXEXFrRBy7ygdcfbXtBZIkSZNgoVuBiJgBfBl4C7A98OcRsX2lQdleIEmSGsbWhWq8Frg1M38DEBFnAQcCN437KFsLJEmSJswR3Wq8BLi75frScpskSZI6xBHdaozWGPu84dqIWAAsKK8+FhG/HuVxGwIPTiOW9YE/TOPxndhHU2KoOhdNeR2nu4/p5qETMdThdaj68VD9e6IT+2hKDOaiHn+D/z91L4aXj3qvzPTU4xOwK/DjluvHAcdNcV9LphnLwg78PdPaR4NiqDQXDXodpxvDtPLQoNehDn+D/z/VJ4aBz0VN/gb/f+pxDLYuVOOXwDYRsWVErA68E7igolgurME+mhLDdNXhb2hCDJ3QhNeh6sd3QhNex7rEMF1NeB3q8Dd0QhNeh57FEGVVrB6LiP2Ak4AZwKmZeeIU97MkM+d2MjZNjbmoB/NQH+aiPsxFPZiH3rNHtyKZ+SOgE0uSLezAPtQZ5qIezEN9mIv6MBf1YB56zBFdSZIkNZI9un0mwqXM6sA81Ie5qA9zUQ/moT7MRfUsdPtEROwUEfPTIfhKmYf6MBf1YS7qwTzUh7moD3t0a66cleEC4MXAIxHxEuC8zHys2sgGi3moD3NRH+aiHsxDfZiL+nFEtyai8L8i4rsRcWREvLC86QDgpszcBfhbYE+K5YLVBeahPsxFfZiLejAP9WEu+oeFbn28HvhL4HJgPvCJcvtLgM3LyzcAVwJv63l0g8M81Ie5qA9zUQ/moT7MRZ+w0O2hiNg6IjYsL0d5PqO8eVfgwcz8GvAPwPoRMQ+4FHglQGY+A/wU2LqngTeMeagPc1Ef5qIezEN9mItmsNDtgYjYKyJuAc4Gjo+IVww3qGfmioiYBTwB3F4+5Mby8uuApUCWfT4A9wNLI2JOT/+IBjAP9WEu6sNc1IN5qA9z0SwWuh0UEbMi4pCI+D8RsV25bS2KnzU+npk7A78FPlbetkZEHAg8CzwFzIqIF2fmcoo3y4uAx4DfAO8on2bDch8revin9RXzUB/moj7MRT2Yh/owF4PBQrdDImI14MPA+4H1gO+Xb4AngIOBq8q7fg3YJiJ2onj9twBmAfcALwC2K+/3EDA7M58CTgd2i4jDy+d4OjNvGP4pRc8xD/VhLurDXNSDeagPczE4LHQnKSJePNr2zHwWeC9wWGZ+nKIv56jy5luA3cr7PUTxM8d+mflkZn4pM/8IXA/8Dnh3+ZiHgG3LyxcCn6F48z0FnFTua2Dn54uIl4623Tz0XkTMHm27uei9iNg2Il7dvt1c9JafE/XhZ4UsdCcgIt4QEWdExLXAd8v+nVlt93kZxRtly3LTD4ENI2Jn4EfAm1vufhmwQ+vjM/M+4BTgVRFxNnAq8KXytuWZeU1m7p+ZH83M/+78X1l/EbFmRJwUEb8Ezo+IY9o/UMxD70TEpyLiZuDciJjne6I6EfHmiLgaOBd4fUSs2Xa7uegyPyfqw88KtbLQbVP24LwuItZt2bwLcDPwWuA7wOEUR1wSEbMjYlvgj8DjwCblY/4HeBrYCrgI2Ltlf+sDt0Zh7Yh4f0TMLt847wHOAfbPzHNGiW+1Qfj5Y4w8vBlYHdgP+HNgd4o5Cs1DF0XE6lHMF7lbFJOhExHbA5tSzBn5WYrXa6i8bSNz0R1j5GImsD/wucx8VWZ+leJ1JiI2MBed5+dEffhZoVWx0G0RRT/NPcB5FB8cw74DfCEzn6b4Zvc0zzWW70rxBvp9edqq3H4PRR/PjMy8BfjviPir8tviAcDl5c8YawL3lfskM2/PzLMz89bRYszMZ5v+88c4edgR2DYzl1H0Rv0OuLj8oH895qHjImIziv/EjwU+Wp6gOLr4xeVr+hOKkZFDy9t2w1x03Ci5+Nvypp2BdTLzrLKwnZOZz0YxDdIbMBcd5edEffhZoYkYyEI3IraMiG3KyzOiaEoH+AXFt78TKL6VExGrZeYD5X9eAE8Ccyl6dsjMCzLzG1nMl3ctsFNEbFn+w96+5WkXUIyAnQr8EvhV+fiHyn38oWt/cE1NJg+lU4GrIuIu4HxgDvDG8meiC83D1EXEOm3Xh0cg3ghEZu4OfBLYJSLmAj+j/Ckvi361y4BXlu+XfzUXUzfJXLyW4kN3XkS8F7gY+PuI+DjwrP8/Td0oefBzoiKTyUXJzwqtFIPwRSOKPqn9KX7OmEMxmfO3M/ODERGt37bKD5V9gb/MzANbts/IYv68T1F8Qfh4Fs3s7c91MsUa188AGwAfyszbW/fRtT+05jqUh7cB22TmZ6M4Cvafgf8vMy9rey7zsAoRsSXFtDk7U/xsd3pm/nv5of1seZ93AHtm5tHlCOFxFK/p14FLgLdm5h0RsQbFARifyMyftz2PuViFaeRiefleuINiZP1/U4w4XQV8NDP/ve15zMU4JpKH8n5+TnRZB3LhZ4WAho7oRsTMiHhFy6bZwD4U38reB5xBMfLxvKMgy+u/AZ6NcoLn8o21otzna4Az2t5oW0bEfuXVT1A0td8IfHD4DVPue0XZ49PI171dp/NQGqI4upXMvJbidX5d+XyvMA+ja81FFD1jq1H8HPcgxZHBZwInwsqjkYetBTwSERuW/+HfQfGhAHBruQ8opue5A1infA7fE2PocC5ml4+/FPhD+TPpExRfQt5UPofvi1FMIw9+TnRYp3NR8rNCQIMK3YjYqjw/iWJy5ouHb8vMezPzA5l5SmbeDMyjaEJv/Vmw1YMUkz/vMbz78vxNwF3AgxHxkYh4a7n9pRQf9GTm7zPzB5n5j5l5Z/uOs/C8b/hN0eU8QPGf0XvLx7yB4iCBH5W3bYZ5WGmsXJTF0LPAuyj6zm4rH3JBPHeA03A+7qMYIdymvH4vRTH7FPADYL+IeDnFB/t6wE/L0cbNMRcrdTEXLwDWBs4CXhgRu0TEDuW2H5b3831Rmk4eRuHnxDR0ORfgZ4VKjSl0gQsjYm2K3sG9gLsi4jVQfFCUZkbRjH4jsPFYO8pi3ryf8dy0I1Hu+5+AtwD/SvFBfmt5/8sz86zWfYxRuA2CruWhfE2/AVwdEddTzFN4PXBTef+fmocRxsxFaSHw9oi4iaJQWpPn1mSfWZ7fBPyBYiQeilHEPTPzSYpWhVMoeuBOBBZn5lOZucJcPE83c/EoRdvCNyneEwuBu4Gfg++LNlPOQ/kFbiU/J6ata7nws0IjZGZfnChGLg4Evkqx/vSrgNXK2zan+Ie8Vcv9FwLHlZdntGzfiuIn89eN81xrUzS3P06xfvUh5fZ3UKx8MtpjourXaADycHC5fXVgg0HOQwdyMXy/v6GYJH318vxXo+RqO4oJ1N9DseLPJ9tuX99c1CYX6w5yLrqch9XansvPifrmws8KTytPtR7RjYiNW765vZdi+qJrgSuAv6acFw94OZDAspaHX0JxlDLlbcPuo/hpabwJnD9H8bP6x4ADMvP7AFlMIbJsuGen9Rtglu+cJqpRHn4AkJlPZ+bvRuudanIeoHO5yGL6qRcDb8vMr5Sv6VeALSJi7SwPwIiIyMxfU/RU707RC/elbDlAIzP/YC5qk4tHBy0XPcxD+8/Xfk60qVEuBv6zQi2qrrRbT8Ba5fk/UMx7dx7F8nwAa7Tcbz2K5vTXlde3oujlad3XVhQ/VazTtn1diqmQ/mSU5/cbnnmo1anbuaA4WOPPysv7UbQivLjqv7uOJ3NRj1PVefD/J3Phqb9OlY/oDn/bjYijgI+UDepzM3MDil6nBRGxY2Y+FcWqQO8HLgc2oliFBorG/z9ExIblvlbLzN9Q9KkNN7wPN7HvU24fnu9wpcxc+Q0vnjvycyCYh/roVS5KHwb+d0TcAPw9cGEW/W6jxmUuzEUV6pQH/38yF+ovM1d9l+krf3LLiHgBMJ9i5aSrM/PccvvqwLMUjeKbUUy2DXAlxUEWb4mIW8o3zu+BYyia1z8ZEV+mmP/uGoqeILL42WP4H/x/RMTlwOeBJRTT8PwkM38/XszZwKMszUN91CAXPwU+k5lnlnl5JjPvHS/m8kOlcT/3mYt66NM8+P+TuVDN9eTbT/nG2ITiDfAXFG+KQyPineXtTwOvBm6jOAr/5ojYuPyH+2uK+VdfUt737Mz8KfAlikJ9N+CFFA3ndwNEMd3RJRTTIJ1M8YZZUj7+oVUVV01lHuqjBrn4dBZzS5KZdw5/iAziiIi5qAfzUB/mQk3SsRHdlm+A6wCHURxotAnFlDffyszfRsSbM/OR8v4nAy+LiBdkMVXR2hRvjFspGta3Au4H7gR2oXhjtFqNYhWTGyjmk1y5/F9m3hkR88pRj4FiHuqjH3PR1FERc1EP5qE+zIUGxbS/HUXE7Ih4U8u+/rQ8nQkcAhwLHFTe9mhEvDQi/h/FP/IbyjcMFD9nb0TRu/MYz70JbqeYBPrGKPp9PhQRC4GLKFYCupri54//ieIoTWDlN9KB6dkxD/VhLurDXNSDeagPc6FBM60R3Yj4JMUb40Fgl4g4naJn5+9avgWeR/FN8YflP+TZFEfcnwMcHhGPUzSqvwC4PjN/HxH/BpweEedQ/DxyNzArMx+LiA0oens+k88t1fd7inWuH2+Nb1C+/ZmH+jAX9WEu6sE81Ie50EDKCUzNAOxMccTj3wAvLbdtAXydYqWS1SgOMvqnlscEMAP4NPCBMfZ7GLCwvHwOsEPLbe+n6Nm5DthrnNgGZnoR81Cfk7moz8lc1ONkHupzMheePD13WuVPBBFxDMWKJbMoVjP5SkQERbP5KzLz1iy+hX0LeFv5mNUyM4EXldt+MMbuN6eYew+Kb5h/2nLbN4D5mTknMy9pi2lgJuAeZh7qw1zUh7moB/NQH+ZCGmkirQv/CZyVxUov6wHfoVjK73pgTkTMyGJt+xsjYlZEvCIzbysf+8/AyZn5IEBEzKKYimQvYA7wOoqVTKBoTl+5fnX5Rlx5lGW2/KQxoG8U81Af5qI+zEU9mIf6MBdSi4k0ff9P+YZZPYsenu0o/t3+kWL5vj9rue/PKObcIyJ2BpYD50fEHhHxrsx8BtiR4hvjbcB7MvOSiFiT4meQc9ufPDOfTft2wDzUibmoD3NRD+ahPsyF1GKVI7qZmRERmfl0RLwL+AXFUZYA/wq8Ffhxef22loe+HXg3sC3FHHz/Vu5nEbCo7Tn+SNEQrzGYh/owF/VhLurBPNSHuZBGion+olD+hHEmsCgzzyy3bQd8HPgv4GGKb32HAI9QvJkeBX5avila9/W8nzY0MeahPsxFfZiLejAP9WEupMJkCt3dgSMz830RsTawbhYTSm8GfI6iV+f7wL+29+P4Jukc81Af5qI+zEU9mIf6MBdSYTKF7hUUK6Espfhp4++Bc7JYCnCsx0T7G0jTYx7qw1zUh7moB/NQH+ZCKkxowYgolgi8k2LFkwsy86pR7rMaRXvQyjeJb5jOMg/1YS7qw1zUg3moD3MhPWfCI7rPe2Ax754/a1TMPNSHuagPc1EP5qE+zIUG1aTWlB7u2wF7d6pkHurDXNSHuagH81Af5kKaxoiuJEmSVGeTGtGVJEmS+oWFriRJkhrJQleSJEmNZKErSQIgIlZExLURcWNEXBcRH2k9oGmMx2xRLjUrSbVjoStJGvZkZu6UmTsAbwL2A45fxWO2ACx0JdWSsy5IkgCIiMcyc52W61sBvwQ2BF4OnEmx2hbABzPzZxHxc+CVFIsTnA78M/BZYB6wBvDlzPx6z/4ISWphoStJAp5f6Jbbfg/8CfAo8Gxm/jEitgG+l5lzI2Ie8LeZuX95/wXARpn5qYhYA7gSODgzb+/l3yJJMMElgCVJAyvK81nAv0TETsAKYNsx7r8P8KcR8fby+vrANhQjvpLUUxa6kqRRla0LK4AHKHp17wfmUBzf8cexHgYck5k/7kmQkjQOD0aTJD1PRMwGvgb8SxY9busD95VLyR4GzCjv+iiwbstDfwx8ICJmlfvZNiLWRpIq4IiuJGnYCyLiWoo2heUUB599sbztK8C5EXEwcAnweLn9v4DlEXEdcBrwJYqZGK6JiACWAQf1JnxJGsmD0SRJktRIti5IkiSpkSx0JUmS1EgWupIkSWokC11JkiQ1koWuJEmSGslCV5IkSY1koStJkqRGstCVJElSI/3/7nJ0CHPZsaIAAAAASUVORK5CYII=\n"
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "party.plot(plot_grouped=True)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-01T00:42:41.270249073Z",
+     "start_time": "2023-12-01T00:42:41.038983113Z"
+    }
+   },
+   "id": "ca178d93cec54650"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Note on concurrent processing\n",
+    "\n",
+    "As of EQcorrscan versions > 0.4.4, detect methods support concurrent processing\n",
+    "of intermediate steps in the matched-filter process when running multiple\n",
+    "chunks of data (e.g. when `endtime - starttime > process_len`). By default this\n",
+    "is disabled as it does increase memory use. However, for cases when downloading\n",
+    "data from a client is a major bottleneck, or processing data is slow (e.g. when you\n",
+    "have limited CPU threads available, but do have a GPU for the FMF correlation\n",
+    "backend), and can cope with the extra memory requirements, this can be much faster.\n",
+    "\n",
+    "To see examples of the speed-ups and memory consumption, look at the benchmarks\n",
+    "in the pull-request [here](https://github.com/eqcorrscan/EQcorrscan/pull/544).\n",
+    "\n",
+    "To enable concurrent processing, use the `concurrent_processing` argument\n",
+    "for `.client_detect` or `.detect` methods on `Tribe` objects."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "ff1e851b71fc452d"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "69047ddc15f2f2c9"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/eqcorrscan/doc/tutorials/matched-filter.rst b/eqcorrscan/doc/tutorials/matched-filter.rst
deleted file mode 100644
index f187208b0..000000000
--- a/eqcorrscan/doc/tutorials/matched-filter.rst
+++ /dev/null
@@ -1,391 +0,0 @@
-Matched-filter detection
-========================
-
-This tutorial will cover using both the match-filter objects, and using the
-internal functions within match-filter.  The match-filter objects are designed
-to simplify meta-data handling allowing for shorter code with fewer mistakes and
-therefore more consistent results.
-
-Match-filter objects
---------------------
-
-The match-filter module contains five objects:
-
- - :doc:`Tribe </submodules/core.match_filter.tribe>`
- - :doc:`Template </submodules/core.match_filter.template>`
- - :doc:`Party </submodules/core.match_filter.party>`
- - :doc:`Family </submodules/core.match_filter.family>`
- - :doc:`Detection </submodules/core.match_filter.detection>`
-
-The :doc:`Tribe </submodules/core.match_filter.tribe>` object is a container for
-multiple :doc:`Template </submodules/core.match_filter.template>` objects.
-:doc:`Templates </submodules/core.match_filter.template>` contain the waveforms
-of the template alongside the metadata used to generate the template.  Both
-:doc:`Templates </submodules/core.match_filter.template>` and
-:doc:`Tribes </submodules/core.match_filter.tribe>` can be written to disk as
-tar archives containing the waveform data in miniseed format, event catalogues
-associated with the :doc:`Templates </submodules/core.match_filter.template>`
-(if provided) in quakeml format and meta-data in a csv file.  This archives
-can be read back in or transferred between machines.
-
-The :doc:`Detection </submodules/core.match_filter.detection>`,
-:doc:`Family </submodules/core.match_filter.family>` and
-:doc:`Party </submodules/core.match_filter.party>` objects are heirachical,
-a single :doc:`Detection </submodules/core.match_filter.detection>` object
-describes a single event detection, and contains information regarding how
-the detection was made, what time it was made at alongside other useful
-information, it does not store the
-:doc:`Template </submodules/core.match_filter.template>` object used for the
-detection, but does store a reference to the name of the
-:doc:`Template </submodules/core.match_filter.template>`.
-:doc:`Family </submodules/core.match_filter.family>` objects are containers
-for multiple :doc:`Detections </submodules/core.match_filter.detection>` made
-using a single :doc:`Template </submodules/core.match_filter.template>`
-(name chosen to match the literature).  These objects do contain the
-:doc:`Template </submodules/core.match_filter.template>` used for the detections,
-and as such can be used to re-create the list of detections is necessary.
-:doc:`Party </submodules/core.match_filter.party>` objects are containers for
-multiple :doc:`Family </submodules/core.match_filter.family>` objects.  All
-objects in the detection heirachy have read and write methods - we recommend
-writing to tar archives (default) for Party and Family objects, as this will
-store all metadata used in detection, which should allow for straightforward
-reproduction of results.
-
-Template creation
------------------
-
-:doc:`Templates </submodules/core.match_filter.template>` have a construct
-method which accesses the functions in
-:doc:`template_gen </submodules/core.template_gen>`.  Template.construct
-only has access to methods that work on individual events, and not catalogs; for
-that use the Tribe.construct method. For example, we can use the *from_sac*
-method to make a Template from a series of SAC files associated with a single
-event:
-
-.. code-block:: python
-
-     >>> import glob
-     >>> from eqcorrscan.core.match_filter import Template
-     >>> import os
-     >>> from eqcorrscan import tests
-     >>> # Get the path for the test-data so we can test this
-     >>> TEST_PATH = os.path.dirname(tests.__file__)
-     >>> sac_files = glob.glob(TEST_PATH + '/test_data/SAC/2014p611252/*')
-     >>> # sac_files is now a list of all the SAC files for event id:2014p611252
-     >>> template = Template().construct(
-     ...      method='from_sac', name='test', lowcut=2.0, highcut=8.0,
-     ...      samp_rate=20.0, filt_order=4, prepick=0.1, swin='all',
-     ...      length=2.0, sac_files=sac_files)
-
-
-Tribe creation
---------------
-
-As eluded to above, Template.construct only works for individual events, to
-make a lot of templates we have to use the Tribe.construct method. The syntax
-is similar, but we don't specify names - templates are named according
-to their start-time, but you can rename them later if you wish:
-
-.. code-block:: python
-
-     >>> from eqcorrscan.core.match_filter import Tribe
-     >>> from obspy.clients.fdsn import Client
-
-     >>> client = Client('NCEDC')
-     >>> catalog = client.get_events(eventid='72572665', includearrivals=True)
-     >>> # To speed the example we have a catalog of one event, but you can have
-     >>> # more, we are also only using the first five picks, again to speed the
-     >>> # example.
-     >>> catalog[0].picks = catalog[0].picks[0:5]
-     >>> tribe = Tribe().construct(
-     ...      method='from_client', catalog=catalog, client_id='NCEDC', lowcut=2.0,
-     ...      highcut=8.0,  samp_rate=20.0, filt_order=4, length=6.0, prepick=0.1,
-     ...      swin='all', process_len=3600, all_horiz=True)
-
-Matched-filter detection using a Tribe
---------------------------------------
-
-Both Tribe and Template objects have *detect* methods.  These methods call
-the main match_filter function.  They can be given an un-processed stream and
-will complete the appropriate processing using the same processing values stored
-in the Template objects.  Because Tribe objects can contain Templates with a
-range of processing values, this work is completed in groups for groups of
-Templates with the same processing values.  The Tribe object also has a
-client_detect method which will download the appropriate data.  Both *detect*
-and *client_detect* methods return Party objects.
-
-For example, we can use the Tribe we created above to detect through a day of
-data by running the following:
-
-.. code-block:: python
-
-     >>> from obspy import UTCDateTime
-
-     >>> party, stream = tribe.client_detect(
-     ...      client=client, starttime=UTCDateTime(2016, 1, 2),
-     ...      endtime=UTCDateTime(2016, 1, 3), threshold=8, threshold_type='MAD',
-     ...      trig_int=6, plotvar=False, return_stream=True)
-
-Generating a Party from a Detection csv
----------------------------------------
-
-If you are moving from detections written out as a csv file from an older
-version of EQcorrscan, but want to use Party objects now, then this section is
-for you!
-
-First, you need to generate a Tribe from the templates you used to make the
-detections.  Instructions for this are in the
-:doc:`Template creation tutorial </tutorials/template-creation>`
-section.
-
-Once you have a Tribe, you can generate a Party using the following:
-
-.. code-block:: python
-
-     >>> detections = read_detections(detection_file) # doctest:+SKIP
-     >>> party = Party() # doctest:+SKIP
-     >>> for template in tribe: # doctest:+SKIP
-     ...    template_detections = [d for d in detections
-     ...                           if d.template_name == template.name]
-     ...    family = Family(template=template, detections=template_detections)
-     ...    party += family
-
-Lag-calc using a Party
-----------------------
-
-Because parties contain Detection and Template information they can be used to
-generate re-picked catalogues using lag-calc:
-
-.. code-block:: python
-
-     >>> stream = stream.merge().sort(['station'])
-     >>> repicked_catalog = party.lag_calc(stream, pre_processed=False,
-     ...                                   shift_len=0.2, min_cc=0.4) # doctest:+ELLIPSIS
-
-By using the above examples you can go from a standard catalog available from
-data centers, to a matched-filter detected and cross-correlation repicked
-catalog in a handful of lines.
-
-
-Simple example - match-filter.match-filter
-------------------------------------------
-
-This example does not work out of the box, you will have to have your own templates
-and data, and set things up for this.  However, in principle matched-filtering
-can be as simple as:
-
-.. code-block:: python
-
-     from eqcorrscan.core.match_filter import match_filter
-     from eqcorrscan.utils import pre_processing
-     from obspy import read
-
-     # Read in and process the daylong data
-     st = read('continuous_data')
-     # Use the same filtering and sampling parameters as your template!
-     st = pre_processing.dayproc(
-         st, lowcut=2, highcut=10, filt_order=4, samp_rate=50,
-         starttime=st[0].stats.starttime.date)
-     # Read in the templates
-     templates = []
-     template_names = ['template_1', 'template_2']
-     for template_file in template_names:
-          templates.append(read(template_file))
-     detections = match_filter(
-          template_names=template_names, template_list=templates, st=st,
-          threshold=8, threshold_type='MAD', trig_int=6, plotvar=False, cores=4)
-
-This will create a list of detections, which are of class detection.  You can
-write out the detections to a csv (colon separated) using the detection.write
-method, set `append=True` to write all the detections to one file.  Beware though,
-if this is set and the file already exists, it will just add on to the old file.
-
-.. code-block:: python
-
-     for detection in detections:
-          detection.write('my_first_detections.csv', append=True)
-
-
-Data gaps and how to handle them
---------------------------------
-
-Data containing gaps can prove problematic for normalized cross-correlation. Because
-the correlations are normalized by the standard deviation of the data, if the standard
-deviation is low, floating-point rounding errors can occur. EQcorrscan tries to
-avoid this in two ways:
-
-1. In the 'eqcorrscan.utils.correlate` (fftw) functions, correlations are not computed
-   when the variance of the data window is less than 1e-10, or when there are fewer than
-   `template_len - 1` non-flat data values (e.g. at-least one sample that is not
-   in a gap), or when the mean of the data multiplied by the standard deviation
-   of the data is less than 1e-10.
-2. The :doc:`pre_processing </submodules/utils.pre_processing>` functions fill gaps prior to processing,
-   process the data, then edit the data within the gaps to be zeros.  During processing
-   aliased signal will appear in the gaps, so it is important to remove those
-   artifacts to ensure that gaps contain zeros (which will be correctly identified
-   by the :doc:`correlate </submodules/utils.correlate>` functions.
-
-As a caveat of point 1: if your data have very low variance, but real data, your data
-will be artificially gained by :doc:`pre_processing </submodules/utils.pre_processing>`
-to ensure stable correlations.
-
-If you provide data with filled gaps (e.g. you used `st = st.merge(fill_value=0)` to
-either:
-
-* The `detect` method of :doc:`Tribe </submodules/core.match_filter.tribe>`,
-* The `detect` method of :doc:`Template </submodules/core.match_filter.template>`,
-* :doc:`shortproc </submodules/autogen/eqcorrscan.utils.pre_processing.shortproc>`,
-* :doc:`dayproc </submodules/autogen/eqcorrscan.utils.pre_processing.dayproc>`,
-
-Then you will end up with the *wrong* result from the correlation or match_filter
-functions. You should provide data with gaps maintained, but merged
-(e.g. run `st = st.merge()` before passing the data to those functions).
-
-If you have data that you know contains gaps that have been padded you must remove
-the pads and reinstate the gaps.
-
-
-
-Memory limitations and what to do about it
-------------------------------------------
-
-You may (if you are running large numbers of templates, long data durations, or using
-a machine with small memory) run in to errors to do with memory consumption. The
-most obvious symptom of this is your computer freezing because it has allocated
-all of its RAM, or declaring that it cannot allocate memory.  Because EQcorrscan
-computes correlations in parallel for multiple templates for the same data period,
-it will generate a large number of correlation vectors.  At start-up, EQcorrscan
-will try to assign the memory it needs (although it then requires a little more
-later to do the summation across channels), so you might find that it fills your
-memory very early - this is just to increase efficiency and ensure that the memory
-is available when needed.
-
-To get around memory limitations you can:
-
-* Reduce the number of templates you run in parallel at once - for example you can
-  make groups of a number of templates and run that group in parallel, before running
-  the next group in parallel.  This is not much less efficient, unless you have
-  a machine with more CPU cores than your group-size.
-* Reduce the length of data you are correlating at any one time.  The default is
-  to use day-long files, but there is nothing stopping you using shorter waveform
-  durations.
-* Reduce the number of channels in templates to only those that you need.  Note,
-  EQcorrscan will generate vectors of zeros for templates that are missing a
-  channel that is present in other templates, again for processing efficiency,
-  if not memory efficiency.
-* Reduce your sampling rate.  Obviously this needs to be at-least twice as large
-  as your upper frequency filter, but much above this is wasted data.
-
-The three threshold parameters
-------------------------------
-
-EQcorrscan detects both positively and negatively correlated waveforms.
-The match-filter routine has three key threshold parameters:
-
-* **threshold_type** can either be MAD, abs or av_chan_corr.  MAD stands for Median Absolute
-  Deviation and is the most commonly used detection statistic in matched-filter studies.
-  abs is the absolute cross-channel correlation sum, note that if you have different
-  numbers of channels in your templates then this threshold metric probably isn't for you.
-  av_chan_corr sets a threshold in the cross-channel correlation sum based on av_chan_corr x number of channels.
-* **threshold** is the value used for the above metric.
-* **trig_int** is the minimum interval in seconds for a detection using the same template.
-  If there are multiple detections within this window for a single template then EQcorrscan
-  will only give the best one (that exceeds the threshold the most).
-
-Advanced example - match-filter-match-filter
---------------------------------------------
-
-In this section we will outline using the templates generated in the first tutorial
-to scan for similar earthquakes within a day of data.  This small example does not truly exploit the parallel
-operations within this package however, so you would be encouraged to think
-about where parallel operations occur (*hint, the code can run one template
-per CPU*), and why there are --instance and--splits flags in the other
-scripts in the github repository (*hint, if you have heaps of memory
-and CPUs you can do some brute force day parallelisation!*).
-
-The main processing flow is outlined in the figure below, note the main speedups
-in this process are achieved by running multiple templates at once, however this
-increases memory usage.  If memory is a problem there are flags (mem_issue) in the
-match_filter.py source that can be turned on - the codes will then write temporary
-files, which is slower, but can allow for more data crunching at once, your trade-off,
-your call.
-
-
-.. image:: processing_flow.png
-     :width: 600px
-     :align: center
-     :alt: processing_flow.png
-
-.. literalinclude:: ../../tutorials/match_filter.py
-
-
-SLURM example
--------------
-
-When the authors of EQcorrscan work on large projects, we use grid computers with
-the SLURM (Simple Linux Utility for Resource Management) job scheduler installed.
-To facilitate ease of setup, what follows is an example of how we run this.
-
-.. code-block:: bash
-
-     #!/bin/bash
-     #SBATCH -J MatchTest
-     #SBATCH -A ##########
-     #SBATCH --time=12:00:00
-     #SBATCH --mem=7G
-     #SBATCH --nodes=1
-     #SBATCH --output=matchout_%a.txt
-     #SBATCH --error=matcherr_%a.txt
-     #SBATCH --cpus-per-task=16
-     #SBATCH --array=0-49
-
-     # Load the required modules here.
-     module load OpenCV/2.4.9-intel-2015a
-     module load ObsPy/0.10.3rc1-intel-2015a-Python-2.7.9
-     module load joblib/0.8.4-intel-2015a-Python-2.7.9
-
-     # Run your python script using srun
-     srun python2.7 LFEsearch.py --splits 50 --instance $SLURM_ARRAY_TASK_ID
-
-
-Where we use a script (LFEsearch.py) that accepts splits and instance flags,
-this section of the script is as follows:
-
-.. code-block:: python
-
-     Split=False
-     instance=False
-     if len(sys.argv) == 2:
-         flag=str(sys.argv[1])
-         if flag == '--debug':
-             Test=True
-             Prep=False
-         elif flag == '--debug-prep':
-             Test=False
-             Prep=True
-         else:
-             raise ValueError("I don't recognise the argument, I only know --debug and --debug-prep")
-     elif len(sys.argv) == 5:
-         # Arguments to allow the code to be run in multiple instances
-         Split=True
-         Test=False
-         Prep=False
-         args=sys.argv[1:len(sys.argv)]
-         for i in xrange(len(args)):
-             if args[i] == '--instance':
-                 instance=int(args[i+1])
-                 print 'I will run this for instance '+str(instance)
-             elif args[i] == '--splits':
-                 splits=int(args[i+1])
-                 print 'I will divide the days into '+str(splits)+' chunks'
-
-     elif not len(sys.argv) == 1:
-         raise ValueError("I only take one argument, no arguments, or two flags with arguments")
-     else:
-         Test=False
-         Prep=False
-         Split=False
-
-The full script is not included in EQcorrscan, but is available on request.
-
-
diff --git a/eqcorrscan/tests/catalog_to_dd_test.py b/eqcorrscan/tests/catalog_to_dd_test.py
index bee613895..a43d4fecd 100644
--- a/eqcorrscan/tests/catalog_to_dd_test.py
+++ b/eqcorrscan/tests/catalog_to_dd_test.py
@@ -148,8 +148,8 @@ def test_process_stream(self):
         self.assertEqual(len(s_picks), len(sliced_stream["S"]))
         for stream in sliced_stream.values():
             for tr in stream:
-                self.assertEqual(
-                    tr.stats.endtime - tr.stats.starttime, extract_len)
+                self.assertEqual(tr.stats.npts,
+                                 extract_len * tr.stats.sampling_rate)
 
     def test_read_phase(self):
         """Function to test the phase reading function"""
@@ -196,34 +196,36 @@ def test_compute_correlation_times(self):
         short_cat = self.catalog[0:10]
         stream_dict = {event.resource_id.id: stream
                        for event, stream in zip(short_cat, self.streams)}
-        for interpolate in [True, False]:
-            diff_times, mapper = compute_differential_times(
-                catalog=short_cat, correlation=True, event_id_mapper=None,
-                max_sep=8., min_link=0, min_cc=0.0, stream_dict=stream_dict,
-                extract_len=2.0, pre_pick=0.5, shift_len=shift_len,
-                interpolate=interpolate, include_master=True)
-            diff_times_cat, _ = compute_differential_times(
-                catalog=short_cat, correlation=False, event_id_mapper=mapper,
-                include_master=True)
-            self.assertEqual(len(diff_times), len(short_cat))
-            for master_id, linked in diff_times.items():
-                for link in linked:
-                    cat_link = [pair for pair in diff_times_cat[master_id]
-                                if pair.event_id_2 == link.event_id_2][0]
-                    if link.event_id_2 == link.event_id_1:
-                        # This is the event matched with itself, check that tt1
-                        # and tt2 are the same.
+        for weight_by_square in (True, False):
+            for interpolate in (True, False):
+                diff_times, mapper = compute_differential_times(
+                    catalog=short_cat, correlation=True, event_id_mapper=None,
+                    max_sep=8., min_link=0, min_cc=0.0,
+                    stream_dict=stream_dict, extract_len=2.0, pre_pick=0.5,
+                    shift_len=shift_len, interpolate=interpolate,
+                    include_master=True, weight_by_square=weight_by_square)
+                diff_times_cat, _ = compute_differential_times(
+                    catalog=short_cat, correlation=False,
+                    event_id_mapper=mapper, include_master=True)
+                self.assertEqual(len(diff_times), len(short_cat))
+                for master_id, linked in diff_times.items():
+                    for link in linked:
+                        cat_link = [pair for pair in diff_times_cat[master_id]
+                                    if pair.event_id_2 == link.event_id_2][0]
+                        if link.event_id_2 == link.event_id_1:
+                            # This is the event matched with itself, check
+                            # that tt1 and tt2 are the same.
+                            for obs in link.obs:
+                                self.assertTrue(np.allclose(
+                                    obs.tt1, obs.tt2, atol=0.000001))
                         for obs in link.obs:
-                            self.assertTrue(
-                                np.allclose(obs.tt1, obs.tt2, atol=0.000001))
-                    for obs in link.obs:
-                        cat_obs = [o for o in cat_link.obs
-                                   if o.station == obs.station and
-                                   o.phase == obs.phase][0]
-                        self.assertEqual(obs.tt1, cat_obs.tt1)
-                        self.assertLessEqual(
-                            abs(obs.tt2 - cat_obs.tt2), shift_len)
-                        self.assertLessEqual(obs.weight, 1.0)
+                            cat_obs = [o for o in cat_link.obs
+                                       if o.station == obs.station and
+                                       o.phase == obs.phase][0]
+                            self.assertEqual(obs.tt1, cat_obs.tt1)
+                            self.assertLessEqual(
+                                abs(obs.tt2 - cat_obs.tt2), shift_len)
+                            self.assertLessEqual(obs.weight, 1.0)
 
     def test_compute_correlations_strange_lengths(self):
         """ Check that streams with too short data are unused. PR #424 """
@@ -306,6 +308,21 @@ def test_write_correlations_parallel_process(self):
         self.assertTrue(os.path.isfile("dt.cc"))
         os.remove('dt.cc')
 
+    def test_write_correlations_parallel_shared_memory(self):
+        # Contents checked elsewhere
+        shift_len = 2
+        short_cat = self.catalog[0:10]
+        stream_dict = {event.resource_id.id: stream
+                       for event, stream in zip(short_cat, self.streams)}
+        write_correlations(
+            catalog=short_cat, event_id_mapper=None,
+            max_sep=8., min_link=0, min_cc=0.0, stream_dict=stream_dict,
+            extract_len=2.0, pre_pick=0.5, shift_len=shift_len,
+            interpolate=False, parallel_process=True, max_workers=2,
+            use_shared_memory=True)
+        self.assertTrue(os.path.isfile("dt.cc"))
+        os.remove('dt.cc')
+
     def test_write_correlations_parallel_trace_correlation(self):
         # Contents checked elsewhere
         shift_len = 2
diff --git a/eqcorrscan/tests/catalog_utils_test.py b/eqcorrscan/tests/catalog_utils_test.py
index ee204b6d2..671a2fdba 100644
--- a/eqcorrscan/tests/catalog_utils_test.py
+++ b/eqcorrscan/tests/catalog_utils_test.py
@@ -12,32 +12,42 @@
 
 @pytest.mark.network
 class CatalogUtilsTests(unittest.TestCase):
+    @classmethod
     @pytest.mark.flaky(reruns=2)  # Rerun the test in case of network timeout
-    def test_filter_picks(self):
-        """ Test various methods of filtering picks in a catalog."""
+    def setUpClass(cls):
         client = Client(str("NCEDC"))
         t1 = UTCDateTime(2004, 9, 28)
         t2 = t1 + 86400
-        catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=3,
-                                    minlatitude=35.7, maxlatitude=36.1,
-                                    minlongitude=-120.6, maxlongitude=-120.2,
-                                    includearrivals=True)
+        cls.catalog = client.get_events(
+            starttime=t1, endtime=t2, minmagnitude=3,
+            minlatitude=35.7, maxlatitude=36.1,
+            minlongitude=-120.6, maxlongitude=-120.2,
+            includearrivals=True)
+        # Phase hints are not included in the picks, but are in the arrivals
+        for ev in cls.catalog:
+            for arr in ev.origins[-1].arrivals:
+                pick = arr.pick_id.get_referred_object()
+                pick.phase_hint = arr.phase
+
+    def test_filter_picks(self):
+        """ Test various methods of filtering picks in a catalog."""
+
         stations = ['BMS', 'BAP', 'PAG', 'PAN', 'PBI', 'PKY', 'YEG', 'WOF']
         channels = ['SHZ', 'SHN', 'SHE', 'SH1', 'SH2']
         networks = ['NC']
         locations = ['']
         top_n_picks = 5
-        filtered_catalog = filter_picks(catalog=catalog, stations=stations,
-                                        channels=channels, networks=networks,
-                                        locations=locations,
-                                        top_n_picks=top_n_picks)
+        filtered_catalog = filter_picks(
+            catalog=self.catalog.copy(), stations=stations,
+            channels=channels, networks=networks,
+            locations=locations, top_n_picks=top_n_picks)
         for event in filtered_catalog:
             for pick in event.picks:
                 self.assertTrue(pick.waveform_id.station_code in stations)
                 self.assertTrue(pick.waveform_id.channel_code in channels)
                 self.assertTrue(pick.waveform_id.network_code in networks)
                 self.assertTrue(pick.waveform_id.location_code in locations)
-        filtered_catalog = filter_picks(catalog=catalog,
+        filtered_catalog = filter_picks(catalog=self.catalog.copy(),
                                         top_n_picks=top_n_picks)
         filtered_stations = []
         for event in filtered_catalog:
@@ -45,6 +55,35 @@ def test_filter_picks(self):
                 filtered_stations.append(pick.waveform_id.station_code)
         self.assertEqual(len(list(set(filtered_stations))), top_n_picks)
 
+    def test_filter_phase_hints(self):
+        filtered_catalog = filter_picks(
+            self.catalog.copy(), phase_hints=["P"])
+
+        phase_hints = set(p.phase_hint for ev in filtered_catalog
+                          for p in ev.picks)
+        print(phase_hints)
+        self.assertEqual(phase_hints, set("P"))
+
+    def test_filter_single_pick(self):
+        filtered_catalog = filter_picks(
+            self.catalog.copy(), enforce_single_pick="earliest")
+
+        for ev in filtered_catalog:
+            stations = {p.waveform_id.station_code for p in ev.picks}
+            for station in stations:
+                picks = [p for p in ev.picks
+                         if p.waveform_id.station_code == station]
+                phase_hints = {p.phase_hint for p in picks}
+                for phase_hint in phase_hints:
+                    matched_picks = [
+                        p for p in picks if p.phase_hint == phase_hint]
+                    if len(matched_picks) != 1:
+                        print(f"Multiple picks for {station} - {phase_hint}")
+                        for pick in matched_picks:
+                            print(pick)
+                    self.assertEqual(1, len(matched_picks))
+        return
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/eqcorrscan/tests/correlate_test.py b/eqcorrscan/tests/correlate_test.py
index 94c470e69..f87ba4b1b 100644
--- a/eqcorrscan/tests/correlate_test.py
+++ b/eqcorrscan/tests/correlate_test.py
@@ -107,14 +107,14 @@ def read_gappy_real_data():
     Super fugly"""
     from obspy.clients.fdsn import Client
     from obspy import UTCDateTime
-    from eqcorrscan.utils.pre_processing import shortproc
+    from eqcorrscan.utils.pre_processing import multi_process
 
     client = Client("GEONET")
     st = client.get_waveforms(
         network="NZ", station="DUWZ", location="20", channel="BNZ",
         starttime=UTCDateTime(2016, 12, 31, 23, 58, 56),
         endtime=UTCDateTime(2017, 1, 1, 0, 58, 56))
-    st = shortproc(
+    st = multi_process(
         st=st.merge(), lowcut=2, highcut=20, filt_order=4, samp_rate=50)
     return st
 
@@ -131,7 +131,7 @@ def read_real_multichannel_templates():
 def get_real_multichannel_data():
     from obspy.clients.fdsn import Client
     from obspy import UTCDateTime
-    from eqcorrscan.utils.pre_processing import shortproc
+    from eqcorrscan.utils.pre_processing import multi_process
 
     t1 = UTCDateTime("2016-01-04T12:00:00.000000Z")
     t2 = t1 + 600
@@ -139,7 +139,7 @@ def get_real_multichannel_data():
             ('NZ', 'HOWZ', '*', 'EHZ', t1, t2)]
     client = Client("GEONET")
     st = client.get_waveforms_bulk(bulk)
-    st = shortproc(
+    st = multi_process(
         st.merge(), lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0,
         starttime=t1, endtime=t2)
     return st
@@ -776,6 +776,7 @@ def test_gappy_real_multi_channel_xcorr(
                 assert np.allclose(cc_1, cc, atol=self.atol * 100)
 
 
+@pytest.mark.serial
 class TestXcorrContextManager:
     # fake_cache = copy.deepcopy(corr.XCOR_FUNCS)
 
diff --git a/eqcorrscan/tests/find_peaks_test.py b/eqcorrscan/tests/find_peaks_test.py
index 87d5d836a..c3a329b79 100644
--- a/eqcorrscan/tests/find_peaks_test.py
+++ b/eqcorrscan/tests/find_peaks_test.py
@@ -10,7 +10,7 @@
 
 from eqcorrscan.utils.findpeaks import (
     find_peaks2_short, coin_trig, multi_find_peaks, find_peaks_compiled,
-    _multi_find_peaks_c, _find_peaks_c, decluster, decluster_distance_time)
+    _find_peaks_c, decluster, decluster_distance_time)
 from eqcorrscan.utils.timer import time_func
 
 
@@ -294,6 +294,20 @@ def spiky(self):
             arr[spike_loc] *= 1000
         return arr, spike_locs, threshold
 
+    @pytest.fixture(scope='class')
+    @pytest.append_name(datasets_1d)
+    def all_above_threshold(self):
+        """ array with large spikes """
+        arr = np.ones(self.data_len, dtype=float)
+        spike_locs = np.random.randint(0, self.data_len, size=500)
+        threshold = 0.5
+        for spike_loc in spike_locs:
+            arr[spike_loc] = 10 * spike_loc
+            # Deliberately make each peak different height. When all peaks
+            # are the same height C and Python return different (but both
+            # valid) peaks).
+        return arr, spike_locs, threshold
+
     @pytest.fixture(scope='class')
     @pytest.append_name(datasets_1d)
     def clustered(self):
@@ -396,8 +410,9 @@ def test_multi_find_peaks(self, dataset_2d, request):
                         print("Peak in parallel but not serial: {0}".format(
                             peak))
             # Test the first step
-            parallel_peak_vals, parallel_peak_indices = _multi_find_peaks_c(
-                arrays=arr, thresholds=threshold, threads=2)
+            parallel_peak_vals, parallel_peak_indices = multi_find_peaks(
+                arr=arr, thresh=threshold, cores=2,
+                internal_func=find_peaks_compiled)
             parallel_sorted = []
             parallel_peaks_sorted = []
             parallel_indices_sorted = []
@@ -446,6 +461,12 @@ def test_multi_find_peaks(self, dataset_2d, request):
                     print("Peak {0} in py but not in C".format(
                         serial_py_peaks[i][j]))
             assert diff_count <= 0.0001 * self.data_len
+        if self.DEBUG:
+            np.save("test_2d_array.npy", arr)
+            np.save("test_c_peaks_serial.npy", serial_c_peaks)
+            np.save("test_c_peaks_parallel.npy", parallel_c_peaks)
+            np.save("test_py_peaks_serial.npy", serial_py_peaks)
+            np.save("test_py_peaks_parallel.npy", parallel_py_peaks)
 
     def test_noisy_timings(self, noisy_multi_array):
         arr = noisy_multi_array.astype(np.float32)
diff --git a/eqcorrscan/tests/lag_calc_test.py b/eqcorrscan/tests/lag_calc_test.py
index 635e57de0..97f33c7ce 100644
--- a/eqcorrscan/tests/lag_calc_test.py
+++ b/eqcorrscan/tests/lag_calc_test.py
@@ -19,14 +19,14 @@
 from eqcorrscan.core.match_filter import Detection, Family, Party, Template
 from eqcorrscan.helpers.mock_logger import MockLoggingHandler
 
-np.random.seed(999)
-
 
 class SyntheticTests(unittest.TestCase):
     """ Test lag-calc with synthetic data. """
     @classmethod
     def setUpClass(cls):
+        np.random.seed(999)
         print("Setting up class")
+        np.random.seed(999)
         samp_rate = 50
         t_length = .75
         # Make some synthetic templates
diff --git a/eqcorrscan/tests/matched_filter/__init__.py b/eqcorrscan/tests/matched_filter/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/eqcorrscan/tests/matched_filter/helper_test.py b/eqcorrscan/tests/matched_filter/helper_test.py
new file mode 100644
index 000000000..610801064
--- /dev/null
+++ b/eqcorrscan/tests/matched_filter/helper_test.py
@@ -0,0 +1,448 @@
+"""
+Test the helpers for EQcorrscan's matched-filter codes.
+"""
+
+import abc
+import copy
+import os.path
+import time
+import shutil
+import tempfile
+import logging
+import unittest
+import pytest
+
+from multiprocessing import Process, Queue
+
+from obspy import UTCDateTime, read
+from obspy.clients.fdsn import Client
+from obspy.clients.earthworm import Client as EWClient
+
+from eqcorrscan.core.match_filter import Party
+from eqcorrscan.core.match_filter.helpers import (
+    get_waveform_client, _test_event_similarity)
+from eqcorrscan.core.match_filter.helpers.processes import (
+    _get_detection_stream, Poison, _prepper, _pre_processor,
+    _make_detections)
+
+try:
+    from pytest_cov.embed import cleanup_on_sigterm
+except ImportError:
+    pass
+else:
+    cleanup_on_sigterm()
+
+
+Logger = logging.getLogger(__name__)
+MAX_WAIT = 15  # Maximum wait time for process to close
+
+
+def get_test_templates():
+    party = Party().read(
+        filename=os.path.join(
+            os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
+            'test_data', 'test_party.tgz'))
+    return [f.template for f in party]
+
+
+class TestHelperFunctions(unittest.TestCase):
+    def test_monkey_patching(self):
+        """ Test that monkey patching a client works. """
+        client = EWClient("pubavo1.wr.usgs.gov", 16022)
+        self.assertFalse(hasattr(client, "get_waveforms_bulk"))
+        client = get_waveform_client(client)
+        self.assertTrue(hasattr(client, "get_waveforms_bulk"))
+
+    @pytest.mark.network
+    def test_event_similarity_quiet(self):
+        self._event_similarity(verbose=False)
+
+    @pytest.mark.network
+    def test_event_similarity_loud(self):
+        self._event_similarity(verbose=True)
+
+    def _event_similarity(self, verbose: bool = False):
+        client = Client("GEONET")
+        event = client.get_events(eventid="2023p923930")[0]
+        self.assertTrue(_test_event_similarity(
+            event, event, verbose=verbose))
+        event2 = event.copy()
+        self.assertTrue(_test_event_similarity(
+            event, event2, verbose=verbose))
+        with self.assertRaises(NotImplementedError):
+            _test_event_similarity(event, "bob")
+        event2.origins = event2.origins[0:-2]
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.origins[-1].arrivals = event2.origins[-1].arrivals[0:-2]
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.origins[-1].arrivals[-1].time_residual = \
+            event2.origins[-1].arrivals[-1].time_residual - .5
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.origins[-1].arrivals[-1].distance = \
+            event2.origins[-1].arrivals[-1].distance - 1.5
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.origins[-1].time -= 60
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        # Picks
+        event2 = event.copy()
+        event2.picks = event2.picks[0:-2]
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.picks[0].time += 20
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.picks[0].waveform_id.station_code = "BOB"
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.picks[0].waveform_id.channel_code = "BOB"
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        # Amplitudes
+        event2 = event.copy()
+        event2.amplitudes = event2.amplitudes[0:-2]
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.amplitudes[0].generic_amplitude += 100
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.amplitudes[0].waveform_id.station_code = "BOB"
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+        event2 = event.copy()
+        event2.amplitudes[0].waveform_id.channel_code = "BOB"
+        self.assertFalse(_test_event_similarity(
+            event, event2, verbose=verbose))
+
+
+class ProcessTests(abc.ABC, unittest.TestCase):
+    directories_to_nuke = []
+    wait_time = 5
+    kwargs = dict()
+    process = None
+
+    @classmethod
+    def tearDownClass(cls):
+        for _dir in cls.directories_to_nuke:
+            try:
+                shutil.rmtree(_dir)
+            except FileNotFoundError:
+                pass
+
+    def wait_for_close(self):
+        total_wait = 0
+        while total_wait <= MAX_WAIT:
+            if self.process.is_alive():
+                time.sleep(self.wait_time)
+            else:
+                break
+        self.assertFalse(self.process.is_alive())
+        self.process.join()
+
+
+class TestMakeDetections(ProcessTests):
+    @classmethod
+    def setUpClass(cls):
+        templates = get_test_templates()
+        cls.global_kwargs = dict(
+            delta=0.01,
+            templates=templates,
+            threshold=8.0,
+            threshold_type="MAD",
+            save_progress=False,
+        )
+
+    def setUp(self):
+        self.kwargs = copy.copy(self.global_kwargs)
+        self.kwargs.update(dict(
+            input_queue=Queue(),
+            output_queue=Queue(),
+            poison_queue=Queue()
+        ))
+
+    def test_poisoning(self):
+        self.process = Process(
+            target=_make_detections, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning(obj=self)
+
+    def test_poisoning_from_input(self):
+        self.process = Process(
+            target=_make_detections, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_from_input(
+            obj=self, input_queue=self.kwargs['input_queue'])
+
+
+class TestPrepper(ProcessTests):
+    @classmethod
+    def setUpClass(cls):
+        templates = get_test_templates()
+        cls.global_kwargs = dict(
+            templates=templates,
+            group_size=5,
+            groups=None,
+            xcorr_func="fmf",
+        )
+
+    def setUp(self):
+        self.kwargs = copy.copy(self.global_kwargs)
+        self.kwargs.update(dict(
+            input_stream_filename_queue=Queue(),
+            output_queue=Queue(maxsize=1),
+            poison_queue=Queue()
+        ))
+
+    def test_poisoning(self):
+        self.process = Process(
+            target=_prepper, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning(obj=self)
+
+    def test_poisoning_while_waiting_on_output(self):
+        self.process = Process(
+            target=_prepper, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_while_waiting_on_output(
+            obj=self, output_queue=self.kwargs["output_queue"])
+
+    def test_poisoning_from_input(self):
+        self.process = Process(
+            target=_prepper, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_from_input(
+            obj=self, input_queue=self.kwargs['input_stream_filename_queue'])
+
+
+class TestPreProcessor(ProcessTests):
+    @classmethod
+    def setUpClass(cls):
+        process_length = 360
+        cls.global_kwargs = dict(
+            template_ids={("NZ", "WVZ", "10", "HHZ")},
+            pre_processed=False,
+            ignore_length=False,
+            ignore_bad_data=False,
+            filt_order=4,
+            highcut=20,
+            lowcut=2,
+            samp_rate=50,
+            process_length=process_length,
+            parallel=False,
+            cores=1,
+            daylong=False,
+            overlap=3.0,
+        )
+
+    def setUp(self):
+        self.kwargs = copy.copy(self.global_kwargs)
+        self.kwargs.update(dict(
+            input_stream_queue=Queue(),
+            temp_stream_dir=tempfile.mkdtemp(),
+            output_filename_queue=Queue(maxsize=1),
+            poison_queue=Queue()
+        ))
+        Logger.info(self.kwargs)
+        self.directories_to_nuke.append(self.kwargs['temp_stream_dir'])
+
+    def test_poisoning(self):
+        self.process = Process(
+            target=_pre_processor, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning(obj=self)
+
+    def test_poisoning_while_waiting_on_output(self):
+        self.process = Process(
+            target=_pre_processor, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_while_waiting_on_output(
+            obj=self, output_queue=self.kwargs["output_filename_queue"])
+
+    def test_poisoning_from_input(self):
+        self.process = Process(
+            target=_pre_processor, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_from_input(
+            obj=self, input_queue=self.kwargs['input_stream_queue'])
+
+
+class TestGetDetectionStreamProcess(ProcessTests):
+    @classmethod
+    def setUpClass(cls):
+        process_length = 360
+        cls.global_kwargs = dict(
+            template_channel_ids=[("NZ", "WVZ", "10", "HHZ")],
+            client=Client("GEONET"),
+            retries=3,
+            min_gap=0.0,
+            buff=3,
+            full_stream_dir=None,
+            pre_process=False,
+            parallel_process=True,
+            process_cores=1,
+            daylong=False,
+            overlap=3.0,
+            ignore_length=False,
+            ignore_bad_data=False,
+            filt_order=4,
+            highcut=20,
+            lowcut=2,
+            samp_rate=50,
+            process_length=process_length,
+        )
+
+    def setUp(self):
+        # Make a copy of the class-wide kwargs
+        self.kwargs = copy.copy(self.global_kwargs)
+        self.kwargs.update(
+            dict(input_time_queue=Queue(),
+                 poison_queue=Queue(),
+                 output_filename_queue=Queue(maxsize=1),
+                 temp_stream_dir=tempfile.mkdtemp()))
+        Logger.info(self.kwargs)
+        # Cleanup
+        self.directories_to_nuke.append(self.kwargs['temp_stream_dir'])
+
+    def test_poisoning(self):
+        self.process = Process(
+            target=_get_detection_stream, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning(obj=self)
+
+    def test_poisoning_while_waiting_on_output(self):
+        self.process = Process(
+            target=_get_detection_stream, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_while_waiting_on_output(
+            obj=self, output_queue=self.kwargs['output_filename_queue'])
+
+    def test_poisoning_from_input(self):
+        self.process = Process(
+            target=_get_detection_stream, kwargs=self.kwargs,
+            name="TestProcess")
+        poisoning_from_input(
+            obj=self, input_queue=self.kwargs['input_time_queue'])
+
+    def test_normal_operation(self):
+        self.process = Process(
+            target=_get_detection_stream, kwargs=self.kwargs,
+            name="TestProcess")
+        self.process.start()
+
+        # Populate time queue
+        self.kwargs['input_time_queue'].put(
+            (UTCDateTime(2021, 1, 1),
+             UTCDateTime(2021, 1, 1, 0, 10)))
+        self.kwargs['input_time_queue'].put(None)
+
+        # Get the output
+        filename = self.kwargs['output_filename_queue'].get()
+        self.assertTrue(os.path.isfile(filename))
+        self.assertEqual(self.kwargs['output_filename_queue'].get(), None)
+
+        # Wait for the process to end
+        self.wait_for_close()
+
+    def test_full_stream_operation(self):
+        kwargs = copy.copy(self.kwargs)
+        kwargs.update(dict(
+            full_stream_dir=tempfile.mkdtemp(),
+            pre_process=True,))
+        self.process = Process(
+            target=_get_detection_stream, kwargs=kwargs,
+            name="TestProcess")
+        self.process.start()
+
+        # Populate time queue
+        kwargs['input_time_queue'].put(
+            (UTCDateTime(2021, 1, 1),
+             UTCDateTime(2021, 1, 1, 0, 10)))
+        kwargs['input_time_queue'].put(None)
+
+        # Get the output
+        filename = kwargs['output_filename_queue'].get()
+        self.assertTrue(os.path.isfile(filename))
+        self.assertEqual(kwargs['output_filename_queue'].get(), None)
+
+        # Wait for the process to end
+        self.wait_for_close()
+
+        # Check for full stream
+        full_st = read(f"{kwargs['full_stream_dir']}/*")
+        st = read(filename)
+
+        self.assertEqual(st[0].id, full_st[0].id)
+        self.assertEqual(st[0].stats.sampling_rate, kwargs['samp_rate'])
+        self.assertEqual(full_st[0].stats.sampling_rate, 100.0)
+
+        # Cleanup
+        self.directories_to_nuke.append(kwargs['full_stream_dir'])
+
+    def test_exception_handling(self):
+        kwargs = copy.copy(self.kwargs)
+        kwargs.update(dict(
+            overlap="bob",  # This need to be a float! Should raise exception
+            pre_process=True,
+        ))
+        self.process = Process(
+            target=_get_detection_stream, kwargs=kwargs, name="ProcessProcess")
+        self.process.start()
+
+        # Populate time queue
+        kwargs['input_time_queue'].put(
+            (UTCDateTime(2021, 1, 1),
+             UTCDateTime(2021, 1, 1, 0, 10)))
+        kwargs['input_time_queue'].put(None)
+
+        time.sleep(self.wait_time)
+        poison = kwargs['poison_queue'].get()
+        self.assertIsInstance(poison, Poison)
+        self.wait_for_close()
+
+
+###############################################################################
+#  STANDARD PROCESS DEATH TESTS
+###############################################################################
+
+
+def poisoning(obj: ProcessTests):
+    obj.process.start()
+    # Test death
+    obj.kwargs['poison_queue'].put(Exception("TestException"))
+    obj.wait_for_close()
+
+
+def poisoning_while_waiting_on_output(obj: ProcessTests, output_queue: Queue):
+    # Fill output queue
+    output_queue.put("This is dog")
+    obj.process.start()
+    # Test death
+    obj.kwargs['poison_queue'].put(Exception("TestException"))
+    obj.wait_for_close()
+
+
+def poisoning_from_input(obj: ProcessTests, input_queue: Queue):
+    obj.process.start()
+    # Test death
+    input_queue.put(Poison(Exception("TestException")))
+    obj.wait_for_close()
+
+
+if __name__ == '__main__':
+    """
+    Run core tests
+    """
+    unittest.main()
diff --git a/eqcorrscan/tests/match_filter_test.py b/eqcorrscan/tests/matched_filter/match_filter_test.py
similarity index 73%
rename from eqcorrscan/tests/match_filter_test.py
rename to eqcorrscan/tests/matched_filter/match_filter_test.py
index 95a8f8a08..ff121c242 100644
--- a/eqcorrscan/tests/match_filter_test.py
+++ b/eqcorrscan/tests/matched_filter/match_filter_test.py
@@ -3,14 +3,16 @@
 """
 import copy
 import os
+import glob
 import unittest
 import pytest
+import logging
+
 
 import numpy as np
 from obspy import read, UTCDateTime, read_events, Catalog, Stream, Trace
 from obspy.clients.fdsn import Client
 from obspy.clients.fdsn.header import FDSNException
-from obspy.clients.earthworm import Client as EWClient
 from obspy.core.event import Pick, Event
 from obspy.core.util.base import NamedTemporaryFile
 
@@ -20,28 +22,26 @@
     read_party, read_tribe, _spike_test)
 from eqcorrscan.core.match_filter.matched_filter import (
     match_filter, MatchFilterError)
-from eqcorrscan.core.match_filter.helpers import get_waveform_client
+from eqcorrscan.core.match_filter.template import (
+    quick_group_templates, group_templates_by_seedid)
+from eqcorrscan.core.match_filter.helpers.tribe import _group
+
+
 from eqcorrscan.utils import pre_processing, catalog_utils
 from eqcorrscan.utils.correlate import fftw_normxcorr, numpy_normxcorr
 from eqcorrscan.utils.catalog_utils import filter_picks
 
 
-class TestHelpers(unittest.TestCase):
-    def test_monkey_patching(self):
-        """ Test that monkey patching a client works. """
-        client = EWClient("pubavo1.wr.usgs.gov", 16022)
-        self.assertFalse(hasattr(client, "get_waveforms_bulk"))
-        client = get_waveform_client(client)
-        self.assertTrue(hasattr(client, "get_waveforms_bulk"))
-        # TODO: This should test that the method actually works as expected.
+Logger = logging.getLogger(__name__)
 
 
 class TestCoreMethods(unittest.TestCase):
     """
     Tests for internal _template_loop and normxcorr2 functions.
     """
+    # changed to overflowerror in 0.5.0
     def test_detection_assertion(self):
-        with self.assertRaises(AssertionError):
+        with self.assertRaises(OverflowError):
             Detection(
                 template_name='a', detect_time=UTCDateTime(), threshold=1.2,
                 threshold_input=8.0, threshold_type="MAD", typeofdet="corr",
@@ -85,8 +85,9 @@ def test_set_normxcorr2(self):
         """
         Check that correlations output are the same irrespective of version.
         """
-        testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
-                                    'test_data')
+        testing_path = os.path.join(
+            os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
+            'test_data')
         template = read(os.path.join(testing_path, 'test_template.ms'))
         template = template[0].data.astype(np.float32)
         image = read(os.path.join(testing_path, 'test_image.ms'))
@@ -124,11 +125,13 @@ def test_threshold_methods(self):
 
     def test_missing_data(self):
         # Test case where there are non-matching streams in the template
-        test_match_filter(stream_excess=True)
+        for conc_proc in [True, False]:
+            test_match_filter(stream_excess=True, conc_proc=conc_proc)
 
     def test_extra_templates(self):
         # Test case where there are non-matching streams in the data
-        test_match_filter(template_excess=True)
+        for conc_proc in [True, False]:
+            test_match_filter(template_excess=True, conc_proc=conc_proc)
 
     def test_onesamp_diff(self):
         """Tests to check that traces in stream are set to same length."""
@@ -145,9 +148,11 @@ def test_onesamp_diff(self):
         templates[0][0].stats.station = 'A'
         templates[0][1].stats.sampling_rate = 40
         templates[0][1].stats.station = 'B'
-        match_filter(template_names=['1'], template_list=templates, st=stream,
-                     threshold=8, threshold_type='MAD', trig_int=1,
-                     plot=False)
+        for conc_proc in [True, False]:
+            match_filter(template_names=['1'], template_list=templates,
+                         st=stream, threshold=8, threshold_type='MAD',
+                         trig_int=1, plot=False,
+                         concurrent_processing=conc_proc)
 
     def test_half_samp_diff(self):
         """
@@ -169,9 +174,11 @@ def test_half_samp_diff(self):
         templates[0][0].stats.station = 'A'
         templates[0][1].stats.sampling_rate = 40
         templates[0][1].stats.station = 'B'
-        match_filter(template_names=['1'], template_list=templates, st=stream,
-                     threshold=8, threshold_type='MAD', trig_int=1,
-                     plot=False)
+        for conc_proc in [True, False]:
+            match_filter(template_names=['1'], template_list=templates,
+                         st=stream, threshold=8, threshold_type='MAD',
+                         trig_int=1, plot=False,
+                         concurrent_processing=conc_proc)
 
 
 @pytest.mark.network
@@ -211,7 +218,7 @@ def setUpClass(cls):
         st.trim(cls.t1 + (4 * 3600), cls.t1 + (5 * 3600)).sort()
         # This is slow?
         print('Processing continuous data')
-        cls.st = pre_processing.shortproc(
+        cls.st = pre_processing.multi_process(
             st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0,
             num_cores=1)
         cls.st.trim(cls.t1 + (4 * 3600), cls.t1 + (5 * 3600)).sort()
@@ -225,13 +232,15 @@ def test_duplicate_channels_in_template(self):
         templates = copy.deepcopy(self.templates)
         # Do this to test an extra condition in match_filter
         templates[0].remove(templates[0].select(station='CNGZ')[0])
-        detections = match_filter(template_names=self.template_names,
-                                  template_list=templates, st=self.st,
-                                  threshold=8.0, threshold_type='MAD',
-                                  trig_int=6.0, plot=False, plotdir='.',
-                                  cores=1)
-        self.assertEqual(len(detections), 1)
-        self.assertEqual(detections[0].no_chans, 6)
+        for conc_proc in [True, False]:
+            Logger.info(f"Running for conc_proc={conc_proc}")
+            detections = match_filter(
+                template_names=self.template_names, template_list=templates,
+                st=self.st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, plot=False, plotdir='.', cores=1,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(detections), 1)
+            self.assertEqual(detections[0].no_chans, 6)
 
     def test_duplicate_cont_data(self):
         """ Check that error is raised if duplicate channels are present in
@@ -239,52 +248,64 @@ def test_duplicate_cont_data(self):
         tr = self.st[0].copy()
         tr.data = np.random.randn(100)
         st = self.st.copy() + tr
-        with self.assertRaises(NotImplementedError):
-            match_filter(template_names=self.template_names,
-                         template_list=self.templates, st=st, threshold=8.0,
-                         threshold_type='MAD', trig_int=6.0, plot=False,
-                         plotdir='.', cores=1)
+        for conc_proc in [True, False]:
+            Logger.info(f"Running for conc_proc={conc_proc}")
+            with self.assertRaises(NotImplementedError):
+                match_filter(template_names=self.template_names,
+                             template_list=self.templates, st=st,
+                             threshold=8.0, threshold_type='MAD',
+                             trig_int=6.0, plot=False, plotdir='.', cores=1,
+                             concurrent_processing=conc_proc)
 
     def test_missing_cont_channel(self):
         """ Remove one channel from continuous data and check that everything
         still works. """
         st = self.st.copy()
         st.remove(st[-1])
-        detections, det_cat = match_filter(
-            template_names=self.template_names, template_list=self.templates,
-            st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0,
-            plot=False, plotdir='.', cores=1, output_cat=True)
-        self.assertEqual(len(detections), 1)
-        self.assertEqual(detections[0].no_chans, 5)
-        self.assertEqual(len(detections), len(det_cat))
+        for conc_proc in [True, False]:
+            Logger.info(f"Running for conc_proc={conc_proc}")
+            detections, det_cat = match_filter(
+                template_names=self.template_names,
+                template_list=self.templates,
+                st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0,
+                plot=False, plotdir='.', cores=1, output_cat=True,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(detections), 1)
+            self.assertEqual(detections[0].no_chans, 5)
+            self.assertEqual(len(detections), len(det_cat))
 
     def test_no_matching_data(self):
         """ No matching data between continuous and templates."""
         st = self.st.copy()
         for tr, staname in zip(st, ['a', 'b', 'c', 'd', 'e']):
             tr.stats.station = staname
-        with self.assertRaises(IndexError):
-            match_filter(
-                template_names=self.template_names,
-                template_list=self.templates, st=st, threshold=8.0,
-                threshold_type='MAD', trig_int=6.0, plot=False,
-                plotdir='.', cores=1)
+        for conc_proc in [True, False]:
+            Logger.info(f"Running for conc_proc={conc_proc}")
+            with self.assertRaises(IndexError):
+                match_filter(
+                    template_names=self.template_names,
+                    template_list=self.templates, st=st, threshold=8.0,
+                    threshold_type='MAD', trig_int=6.0, plot=False,
+                    plotdir='.', cores=1, concurrent_processing=conc_proc)
 
     @pytest.mark.flaky(reruns=2)
     def test_geonet_tribe_detect(self):
         client = Client('GEONET')
         # Try to force issues with starting samples on wrong day for geonet
         # data
-        tribe = self.tribe.copy()
-        for template in tribe.templates:
-            template.process_length = 86400
-            template.st = Stream(template.st[0])
-            # Only run one channel templates
-        party = self.tribe.copy().client_detect(
-            client=client, starttime=self.t1, endtime=self.t2,
-            threshold=8.0, threshold_type='MAD', trig_int=6.0,
-            daylong=False, plot=False)
-        self.assertEqual(len(party), 16)
+        # TODO: This does nothing
+        # tribe = self.tribe.copy()
+        # for template in tribe.templates:
+        #     template.process_length = 86400
+        #     template.st = Stream(template.st[0])
+        #     # Only run one channel templates
+        for conc_proc in [True, False]:
+            Logger.info(f"Running for conc_proc={conc_proc}")
+            party = self.tribe.copy().client_detect(
+                client=client, starttime=self.t1, endtime=self.t2,
+                threshold=8.0, threshold_type='MAD', trig_int=6.0,
+                daylong=False, plot=False, concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 16)
 
 
 class TestGappyData(unittest.TestCase):
@@ -311,26 +332,30 @@ def test_gappy_data(self):
         self.assertEqual(len(gaps), 1)
         start_gap = gaps[0][4]
         end_gap = gaps[0][5]
-        party = self.tribe.client_detect(
-            client=self.client, starttime=self.starttime,
-            endtime=self.endtime, threshold=0.6,
-            threshold_type="absolute", trig_int=2, plot=False,
-            parallel_process=False, cores=1)
-        for family in party:
-            print(family)
-            for detection in family:
-                self.assertFalse(
-                    start_gap <= detection.detect_time <= end_gap)
-        for family in party:
-            self.assertTrue(len(family) in [5, 1])
+        for conc_proc in [True, False]:
+            party = self.tribe.client_detect(
+                client=self.client, starttime=self.starttime,
+                endtime=self.endtime, threshold=0.6,
+                threshold_type="absolute", trig_int=2, plot=False,
+                parallel_process=False, cores=1,
+                concurrent_processing=conc_proc)
+            for family in party:
+                print(family)
+                for detection in family:
+                    self.assertFalse(
+                        start_gap <= detection.detect_time <= end_gap)
+            for family in party:
+                self.assertTrue(len(family) in [5, 1])
 
     def test_gappy_data_removal(self):
-        party = self.tribe.client_detect(
-            client=self.client, starttime=self.starttime,
-            endtime=self.endtime, threshold=8,
-            threshold_type="MAD", trig_int=2, plot=False,
-            parallel_process=False, min_gap=1)
-        self.assertEqual(len(party), 0)
+        for conc_proc in [True, False]:
+            party = self.tribe.client_detect(
+                client=self.client, starttime=self.starttime,
+                endtime=self.endtime, threshold=8,
+                threshold_type="MAD", trig_int=2, plot=False,
+                parallel_process=False, min_gap=1,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 0)
 
 
 @pytest.mark.network
@@ -373,7 +398,7 @@ def setUpClass(cls):
         st = client.get_waveforms_bulk(bulk_info)
         st.merge(fill_value='interpolate')
         cls.unproc_st = st.copy()
-        cls.st = pre_processing.shortproc(
+        cls.st = pre_processing.multi_process(
             st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0,
             num_cores=1, starttime=st[0].stats.starttime,
             endtime=st[0].stats.starttime + process_len)
@@ -382,14 +407,16 @@ def setUpClass(cls):
 
     def test_detection_extraction(self):
         # Test outputting the streams works
-        detections, detection_streams = \
-            match_filter(template_names=self.template_names,
-                         template_list=self.templates, st=self.st,
-                         threshold=8.0, threshold_type='MAD',
-                         trig_int=6.0, plot=False, plotdir='.',
-                         cores=1, extract_detections=True)
-        self.assertEqual(len(detections), 4)
-        self.assertEqual(len(detection_streams), len(detections))
+        for conc_proc in [True, False]:
+            detections, detection_streams = match_filter(
+                template_names=self.template_names,
+                template_list=self.templates, st=self.st,
+                threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, plot=False, plotdir='.',
+                cores=1, extract_detections=True,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(detections), 4)
+            self.assertEqual(len(detection_streams), len(detections))
 
     def test_normxcorr(self):
         # Test a known issue with early normalisation methods
@@ -405,15 +432,17 @@ def test_normxcorr(self):
         self.assertTrue(np.allclose(ccc, ccc_numpy, atol=0.04))
 
     def test_catalog_extraction(self):
-        detections, det_cat, detection_streams = \
-            match_filter(template_names=self.template_names,
-                         template_list=self.templates, st=self.st,
-                         threshold=8.0, threshold_type='MAD',
-                         trig_int=6.0, plot=False, plotdir='.',
-                         cores=1, extract_detections=True, output_cat=True)
-        self.assertEqual(len(detections), 4)
-        self.assertEqual(len(detection_streams), len(detections))
-        self.assertEqual(len(detection_streams), len(det_cat))
+        for conc_proc in [True, False]:
+            detections, det_cat, detection_streams = match_filter(
+                template_names=self.template_names,
+                template_list=self.templates, st=self.st,
+                threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, plot=False, plotdir='.',
+                cores=1, extract_detections=True, output_cat=True,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(detections), 4)
+            self.assertEqual(len(detection_streams), len(detections))
+            self.assertEqual(len(detection_streams), len(det_cat))
 
     def test_same_detections_individual_and_parallel(self):
         """
@@ -577,7 +606,7 @@ def test_tribe_copy(self):
         """Test copy method"""
         party = Party().read(
             filename=os.path.join(
-                os.path.abspath(os.path.dirname(__file__)),
+                os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
                 'test_data', 'test_party.tgz'))
         tribe = Tribe(f.template for f in party.families)
         copied = tribe.copy()
@@ -675,34 +704,30 @@ def setUpClass(cls):
             method='from_meta_file', catalog=catalog, st=st.copy(),
             lowcut=0.1, highcut=0.45, samp_rate=1.0, filt_order=4,
             length=20.0, prepick=0.15, swin='all', process_len=process_len)
-        st = pre_processing.shortproc(
+        st = pre_processing.multi_process(
             st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0,
             num_cores=1, starttime=st[0].stats.starttime,
             endtime=st[0].stats.starttime + process_len)
         party = Party().read(
             filename=os.path.join(
-                os.path.abspath(os.path.dirname(__file__)),
+                os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
                 'test_data', 'test_party.tgz'))
         cls.family = party.sort()[0].copy()
         cls.t1, cls.t2, cls.template_stachans = (t1, t2, template_stachans)
         cls.unproc_st, cls.tribe, cls.onehztribe, cls.st, cls.party = (
             unproc_st, tribe, onehztribe, st, party)
 
-    @classmethod
-    def tearDownClass(cls):
-        for f in ['eqcorrscan_temporary_party.tgz']:
-            if os.path.isfile(f):
-                os.remove(f)
-
     def test_tribe_detect(self):
         """Test the detect method on Tribe objects"""
-        party = self.tribe.detect(
-            stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, parallel_process=False)
-        self.assertEqual(len(party), 4)
-        compare_families(
-            party=party, party_in=self.party, float_tol=0.05,
-            check_event=True)
+        for conc_proc in [True, False]:
+            party = self.tribe.detect(
+                stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False,
+                parallel_process=False, concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 4)
+            compare_families(
+                party=party, party_in=self.party, float_tol=0.05,
+                check_event=True)
 
     def test_tribe_detect_with_empty_streams(self):
         """
@@ -711,175 +736,201 @@ def test_tribe_detect_with_empty_streams(self):
         continuous data is incomplete. This test should fail in v0.4.2 due to
         a bug.
         """
-        # remove trace for station PHA (PHOB, PSR, PCA, PAG remain)
-        st = self.unproc_st.copy().remove(
-            self.unproc_st.copy().select(station='PHA')[0])
-        tribe1 = Tribe([t.copy() for t in self.tribe
-                        if (t.name == '2004_09_28t17_19_08' or
-                            t.name == '2004_09_28t17_19_25')])
-        # run detection with 2 templates in tribe
-        party1 = tribe1.detect(
-            stream=st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plotvar=False, parallel_process=False)
-        self.assertEqual(len(party1), 2)
-        party1 = Party([f for f in party1
-                        if f.template.name == '2004_09_28t17_19_25'])
-        # run detection with only 1 template in tribe
-        tribe2 = Tribe([t.copy() for t in self.tribe
-                        if t.name == '2004_09_28t17_19_25'])
-        party2 = tribe2.detect(
-            stream=st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plotvar=False, parallel_process=False)
-        self.assertEqual(len(party2), 1)
-        # This should fail in v0.4.2
-        compare_families(
-            party=party1, party_in=party2, float_tol=0.05, check_event=False)
+        for conc_proc in [True, False]:
+            # remove trace for station PHA (PHOB, PSR, PCA, PAG remain)
+            st = self.unproc_st.copy().remove(
+                self.unproc_st.copy().select(station='PHA')[0])
+            tribe1 = Tribe([t.copy() for t in self.tribe
+                            if (t.name == '2004_09_28t17_19_08' or
+                                t.name == '2004_09_28t17_19_25')])
+            # run detection with 2 templates in tribe
+            party1 = tribe1.detect(
+                stream=st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plotvar=False,
+                parallel_process=False, concurrent_processing=conc_proc)
+            self.assertEqual(len(party1), 2)
+            party1 = Party([f for f in party1
+                            if f.template.name == '2004_09_28t17_19_25'])
+            # run detection with only 1 template in tribe
+            tribe2 = Tribe([t.copy() for t in self.tribe
+                            if t.name == '2004_09_28t17_19_25'])
+            party2 = tribe2.detect(
+                stream=st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plotvar=False,
+                parallel_process=False, concurrent_processing=conc_proc)
+            self.assertEqual(len(party2), 1)
+            # This should fail in v0.4.2
+            compare_families(
+                party=party1, party_in=party2, float_tol=0.05,
+                check_event=False)
 
     def test_tribe_detect_short_data(self):
         """Test the detect method on Tribe objects"""
-        short_st = self.unproc_st.copy()
-        tribe = self.tribe.copy()
-        for template in tribe:
-            template.process_length = 2400
-        party = tribe.detect(
-            stream=short_st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, parallel_process=False,
-            ignore_bad_data=True)
-        self.assertEqual(len(party), 4)
+        for conc_proc in [True, False]:
+            short_st = self.unproc_st.copy()
+            tribe = self.tribe.copy()
+            for template in tribe:
+                template.process_length = 2400
+            party = tribe.detect(
+                stream=short_st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False,
+                parallel_process=False, concurrent_processing=conc_proc,
+                ignore_bad_data=True)
+            self.assertEqual(len(party), 4)
 
     @pytest.mark.serial
     def test_tribe_detect_parallel_process(self):
         """Test the detect method on Tribe objects"""
-        party = self.tribe.detect(
-            stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, parallel_process=True,
-            process_cores=2)
-        self.assertEqual(len(party), 4)
-        compare_families(
-            party=party, party_in=self.party, float_tol=0.05,
-            check_event=False)
+        for conc_proc in [True, False]:
+            party = self.tribe.detect(
+                stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False, parallel_process=True,
+                process_cores=2, concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 4)
+            compare_families(
+                party=party, party_in=self.party, float_tol=0.05,
+                check_event=False)
 
     def test_tribe_detect_save_progress(self):
         """Test the detect method on Tribe objects"""
-        party = self.tribe.detect(
-            stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, parallel_process=False,
-            save_progress=True)
-        self.assertEqual(len(party), 4)
-        self.assertTrue(os.path.isfile("eqcorrscan_temporary_party.tgz"))
-        saved_party = Party().read("eqcorrscan_temporary_party.tgz")
-        self.assertEqual(party, saved_party)
+        for conc_proc in [True, False]:
+            party = self.tribe.detect(
+                stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False,
+                parallel_process=False, save_progress=True,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 4)
+            # Get all the parties
+            party_files = glob.glob(".parties/????/???/*.pkl")
+            saved_party = Party()
+            for pf in party_files:
+                saved_party += Party().read(pf)
+            self.assertEqual(party, saved_party)
 
     @pytest.mark.serial
     def test_tribe_detect_masked_data(self):
         """Test using masked data - possibly raises error at pre-processing.
         Padding may also result in error at correlation stage due to poor
         normalisation."""
-        stream = self.unproc_st.copy()
-        stream[0] = (stream[0].copy().trim(
-            stream[0].stats.starttime, stream[0].stats.starttime + 1800) +
-                     stream[0].trim(
-            stream[0].stats.starttime + 1900, stream[0].stats.endtime))
-        party = self.tribe.detect(
-            stream=stream, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, parallel_process=False,
-            xcorr_func='fftw', concurrency='concurrent')
-        self.assertEqual(len(party), 4)
+        for conc_proc in [True, False]:
+            stream = self.unproc_st.copy()
+            stream[0] = (stream[0].copy().trim(
+                stream[0].stats.starttime,
+                stream[0].stats.starttime + 1800) + stream[0].trim(
+                stream[0].stats.starttime + 1900, stream[0].stats.endtime))
+            party = self.tribe.detect(
+                stream=stream, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False,
+                parallel_process=False, xcorr_func='fftw',
+                concurrency='concurrent', concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 4)
 
     def test_tribe_detect_no_processing(self):
         """Test that no processing is done when it isn't necessary."""
-        tribe = self.tribe.copy()
-        for template in tribe:
-            template.lowcut = None
-            template.highcut = None
-        party = tribe.detect(
-            stream=self.st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, parallel_process=False)
-        self.assertEqual(len(party), 4)
-        compare_families(
-            party=party, party_in=self.party, float_tol=0.05,
-            check_event=False)
+        for conc_proc in [True, False]:
+            tribe = self.tribe.copy()
+            for template in tribe:
+                template.lowcut = None
+                template.highcut = None
+            party = tribe.detect(
+                stream=self.st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False,
+                parallel_process=False, concurrent_processing=conc_proc)
+            self.assertEqual(len(party), 4)
+            compare_families(
+                party=party, party_in=self.party, float_tol=0.05,
+                check_event=False)
 
     @pytest.mark.flaky(reruns=2)
     @pytest.mark.network
     def test_client_detect(self):
         """Test the client_detect method."""
-        client = Client('NCEDC')
-        party = self.tribe.copy().client_detect(
-            client=client, starttime=self.t1 + 2.75, endtime=self.t2,
-            threshold=8.0, threshold_type='MAD', trig_int=6.0,
-            daylong=False, plot=False)
-        compare_families(
-            party=party, party_in=self.party, float_tol=0.05,
-            check_event=False)
+        for conc_proc in [True, False]:
+            client = Client('NCEDC')
+            party = self.tribe.copy().client_detect(
+                client=client, starttime=self.t1 + 2.75, endtime=self.t2,
+                threshold=8.0, threshold_type='MAD', trig_int=6.0,
+                daylong=False, plot=False, concurrent_processing=conc_proc)
+            compare_families(
+                party=party, party_in=self.party, float_tol=0.05,
+                check_event=False)
 
     @pytest.mark.flaky(reruns=2)
     @pytest.mark.network
     def test_client_detect_save_progress(self):
         """Test the client_detect method."""
-        client = Client('NCEDC')
-        party = self.tribe.copy().client_detect(
-            client=client, starttime=self.t1 + 2.75, endtime=self.t2,
-            threshold=8.0, threshold_type='MAD', trig_int=6.0,
-            daylong=False, plot=False, save_progress=True)
-        self.assertTrue(os.path.isfile("eqcorrscan_temporary_party.tgz"))
-        saved_party = Party().read("eqcorrscan_temporary_party.tgz")
-        self.assertEqual(party, saved_party)
-        os.remove("eqcorrscan_temporary_party.tgz")
-        compare_families(
-            party=party, party_in=self.party, float_tol=0.05,
-            check_event=False)
+        for conc_proc in [True, False]:
+            client = Client('NCEDC')
+            party = self.tribe.copy().client_detect(
+                client=client, starttime=self.t1 + 2.75, endtime=self.t2,
+                threshold=8.0, threshold_type='MAD', trig_int=6.0,
+                daylong=False, plot=False, save_progress=True,
+                concurrent_processing=conc_proc)
+            self.assertTrue(os.path.isdir(".parties"))
+
+            # Get all the parties
+            party_files = glob.glob(".parties/????/???/*.pkl")
+            saved_party = Party()
+            for pf in party_files:
+                saved_party += Party().read(pf)
+            self.assertEqual(party, saved_party)
+            compare_families(
+                party=party, party_in=self.party, float_tol=0.05,
+                check_event=False)
 
     @pytest.mark.network
     def test_party_lag_calc(self):
         """Test the lag-calc method on Party objects."""
         # Test the chained method
-        chained_cat = self.tribe.detect(
-            stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False).lag_calc(
-            stream=self.unproc_st, pre_processed=False)
-        catalog = self.party.copy().lag_calc(
-            stream=self.unproc_st, pre_processed=False)
-        self.assertEqual(len(catalog), 4)
-        for ev1, ev2 in zip(catalog, chained_cat):
-            ev1.picks.sort(key=lambda p: p.time)
-            ev2.picks.sort(key=lambda p: p.time)
-        catalog.events.sort(key=lambda e: e.picks[0].time)
-        chained_cat.events.sort(key=lambda e: e.picks[0].time)
-        for ev, chained_ev in zip(catalog, chained_cat):
-            for i in range(len(ev.picks)):
-                for key in ev.picks[i].keys():
-                    if key == 'resource_id':
-                        continue
-                    if key == 'comments':
-                        continue
-                    if key == 'waveform_id':
-                        for _k in ['network_code', 'station_code',
-                                   'channel_code']:
-                            self.assertEqual(
-                                sorted(ev.picks,
-                                       key=lambda p: p.time)[i][key][_k],
-                                sorted(chained_ev.picks,
-                                       key=lambda p: p.time)[i][key][_k])
-                        continue
-                    self.assertEqual(
-                        sorted(ev.picks,
-                               key=lambda p: p.time)[i][key],
-                        sorted(chained_ev.picks,
-                               key=lambda p: p.time)[i][key])
-                pick_corrs = sorted(ev.picks, key=lambda p: p.time)
-                pick_corrs = [float(p.comments[0].text.split("=")[-1])
-                              for p in pick_corrs]
-                chained_ev_pick_corrs = sorted(ev.picks, key=lambda p: p.time)
-                chained_ev_pick_corrs = [
-                    float(p.comments[0].text.split("=")[-1])
-                    for p in chained_ev_pick_corrs]
-                assert np.allclose(
-                    pick_corrs, chained_ev_pick_corrs, atol=0.001)
-                assert np.allclose(
-                    float(ev.comments[0].text.split("=")[-1]),
-                    float(chained_ev.comments[0].text.split("=")[-1]),
-                    atol=0.001)
+        for conc_proc in [True, False]:
+            chained_cat = self.tribe.detect(
+                stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False,
+                concurrent_processing=conc_proc).lag_calc(
+                stream=self.unproc_st, pre_processed=False)
+            catalog = self.party.copy().lag_calc(
+                stream=self.unproc_st, pre_processed=False)
+            self.assertEqual(len(catalog), 4)
+            for ev1, ev2 in zip(catalog, chained_cat):
+                ev1.picks.sort(key=lambda p: p.time)
+                ev2.picks.sort(key=lambda p: p.time)
+            catalog.events.sort(key=lambda e: e.picks[0].time)
+            chained_cat.events.sort(key=lambda e: e.picks[0].time)
+            for ev, chained_ev in zip(catalog, chained_cat):
+                for i in range(len(ev.picks)):
+                    for key in ev.picks[i].keys():
+                        if key == 'resource_id':
+                            continue
+                        if key == 'comments':
+                            continue
+                        if key == 'waveform_id':
+                            for _k in ['network_code', 'station_code',
+                                       'channel_code']:
+                                self.assertEqual(
+                                    sorted(ev.picks,
+                                           key=lambda p: p.time)[i][key][_k],
+                                    sorted(chained_ev.picks,
+                                           key=lambda p: p.time)[i][key][_k])
+                            continue
+                        self.assertEqual(
+                            sorted(ev.picks,
+                                   key=lambda p: p.time)[i][key],
+                            sorted(chained_ev.picks,
+                                   key=lambda p: p.time)[i][key])
+                    pick_corrs = sorted(ev.picks, key=lambda p: p.time)
+                    pick_corrs = [float(p.comments[0].text.split("=")[-1])
+                                  for p in pick_corrs]
+                    chained_ev_pick_corrs = sorted(
+                        ev.picks, key=lambda p: p.time)
+                    chained_ev_pick_corrs = [
+                        float(p.comments[0].text.split("=")[-1])
+                        for p in chained_ev_pick_corrs]
+                    assert np.allclose(
+                        pick_corrs, chained_ev_pick_corrs, atol=0.001)
+                    assert np.allclose(
+                        float(ev.comments[0].text.split("=")[-1]),
+                        float(chained_ev.comments[0].text.split("=")[-1]),
+                        atol=0.001)
 
     def test_party_lag_calc_preprocessed(self):
         """Test that the lag-calc works on pre-processed data."""
@@ -909,9 +960,9 @@ def test_party_lag_calc_short_data(self):
         party = self.party.copy()
         st = self.unproc_st.copy()
         cut_start = st[0].stats.starttime + (
-                0.5 * party[0].template.process_length)
+            0.5 * party[0].template.process_length)
         cut_end = st[0].stats.starttime + (
-                0.8 * party[0].template.process_length)
+            0.8 * party[0].template.process_length)
         st = st.cutout(cut_start, cut_end)
         catalog = party.lag_calc(stream=st, pre_processed=False,
                                  ignore_length=True, ignore_bad_data=True)
@@ -981,15 +1032,19 @@ def test_day_long_methods(self):
             template.process_length = 86400
         # Aftershock sequence, with 1Hz data, lots of good correlations = high
         # MAD!
-        day_party = daylong_tribe.detect(
-            stream=st, threshold=8.0, threshold_type='MAD', trig_int=6.0,
-            daylong=True, plot=False, parallel_process=False)
-        self.assertEqual(len(day_party), 4)
-        day_catalog = day_party.lag_calc(stream=st, pre_processed=False,
-                                         parallel=False)
-        self.assertEqual(len(day_catalog), 4)
-        pre_picked_cat = day_party.get_catalog()
-        self.assertEqual(len(pre_picked_cat), 4)
+        Logger.info(f"Downloaded {len(st)} traces - handing off to detect")
+        for conc_proc in [True, False]:
+            Logger.info(f"Running conc_proc={conc_proc}")
+            day_party = daylong_tribe.detect(
+                stream=st, threshold=8.0, threshold_type='MAD', trig_int=6.0,
+                daylong=True, plot=False, parallel_process=False,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(day_party), 4)
+            day_catalog = day_party.lag_calc(stream=st, pre_processed=False,
+                                             parallel=False)
+            self.assertEqual(len(day_catalog), 4)
+            pre_picked_cat = day_party.get_catalog()
+            self.assertEqual(len(pre_picked_cat), 4)
 
     def test_family_lag_calc(self):
         """Test the lag-calc method on family."""
@@ -1000,10 +1055,12 @@ def test_family_lag_calc(self):
     def test_template_detect(self):
         """Test detect method on Template objects."""
         test_template = self.family.template.copy()
-        party_t = test_template.detect(
-            stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
-            trig_int=6.0, daylong=False, plot=False, overlap=None)
-        self.assertEqual(len(party_t), 1)
+        for conc_proc in [True, False]:
+            party_t = test_template.detect(
+                stream=self.unproc_st, threshold=8.0, threshold_type='MAD',
+                trig_int=6.0, daylong=False, plot=False, overlap=None,
+                concurrent_processing=conc_proc)
+            self.assertEqual(len(party_t), 1)
 
     def test_template_construct_not_implemented(self):
         """Test template construction."""
@@ -1019,7 +1076,7 @@ class TestMatchObjectLight(unittest.TestCase):
     def setUpClass(cls):
         cls.party = Party().read(
             filename=os.path.join(
-                os.path.abspath(os.path.dirname(__file__)),
+                os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
                 'test_data', 'test_party.tgz'))
         cls.tribe = Tribe(templates=[fam.template for fam in cls.party])
         cls.family = cls.party.sort()[0].copy()
@@ -1041,6 +1098,14 @@ def test_party_plot_grouped_rate(self):
             plot_grouped=True, rate=True, show=False, return_figure=True)
         return fig
 
+    def test_tribe_detect_duplicated_template_names(self):
+        tribe = self.tribe.copy()
+        duplicated_template = tribe[0].copy()
+        # Remove some traces to make it actually different
+        duplicated_template.st = duplicated_template.st[0:-2]
+        with self.assertRaises(NotImplementedError):
+            tribe += duplicated_template
+
     def test_party_io_list(self):
         """Test reading and writing party objects."""
         if os.path.isfile('test_party_list.tgz'):
@@ -1084,9 +1149,18 @@ def test_tribe_internal_methods(self):
     def test_tribe_add(self):
         """Test add method"""
         added = self.tribe.copy()
-        self.assertEqual(len(added + added[0]), 5)
+        # Check that we can't add same named templates
+        with self.assertRaises(NotImplementedError):
+            _ = added + added[0]  # noqa: F841
+        with self.assertRaises(NotImplementedError):
+            added += added[0]
+        # Check that addition works for differently named templates
+        different = added.copy()
+        for template in different:
+            template.name += "_a"
+        self.assertEqual(len(added + different[0]), 5)
         self.assertEqual(len(added), 4)
-        added += added[-1]
+        added += different[-1]
         self.assertEqual(len(added), 5)
 
     def test_tribe_remove(self):
@@ -1342,6 +1416,22 @@ def test_template_io(self):
             if os.path.isfile('test_template.tgz'):
                 os.remove('test_template.tgz')
 
+    def test_template_grouping(self):
+        # PR #524
+        # Test that this works directly on the tribe - it should
+        tribe_len = len(self.tribe)
+        groups = quick_group_templates(self.tribe)
+        self.assertEqual(len(groups), 1)
+        # Add one copy of a template with a different processing length
+        t2 = self.tribe[0].copy()
+        t2.process_length -= 100
+        templates = [t2]
+        templates.extend(self.tribe.templates)
+        # Quick check that we haven't changed the tribe
+        self.assertEqual(len(self.tribe), tribe_len)
+        groups2 = quick_group_templates(templates)
+        self.assertEqual(len(groups2), 2)
+
     def test_party_io(self):
         """Test reading and writing party objects."""
         if os.path.isfile('test_party_out.tgz'):
@@ -1471,6 +1561,87 @@ def test_family_catalogs(self):
         self.assertEqual(family.catalog, get_catalog(family.detections))
 
 
+class TestTemplateGrouping(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        templates = []
+        station_names = [
+            'ALPH', 'BETA', 'GAMM', 'KAPP', 'ZETA', 'BOB', 'MAGG',
+            'ALF', 'WALR', 'ALBA', 'PENG', 'BANA', 'WIGG', 'SAUS',
+            'MALC']
+        for i in range(20):
+            template = Stream()
+            for j in range(10):
+                for c in ["EHZ", "EHN", "EHE"]:
+                    tr = Trace(
+                        data=np.random.randn(600),
+                        header=dict(station=station_names[j],
+                                    channel=c, network="NZ", location="10",
+                                    sampling_rate=100.,
+                                    starttime=UTCDateTime() + (i * 1000)))
+                    template += tr
+            templates.append(template)
+        st = templates[0].copy()
+        for tr in st:
+            tr.data = np.random.randn(3600 * 100)
+        templates = [Template(name=str(i), st=t)
+                     for i, t in enumerate(templates)]
+        cls.templates = templates
+        cls.st = st
+        cls.st_seed_ids = {tr.id for tr in st}
+
+    def test_all_grouped(self):
+        groups = group_templates_by_seedid(
+            templates=self.templates, st_seed_ids=self.st_seed_ids,
+            group_size=100)
+        self.assertEqual(len(groups), 1)
+        self.assertEqual(len(groups[0]), len(self.templates))
+
+    def test_group_size_respected(self):
+        groups = group_templates_by_seedid(
+            templates=self.templates, st_seed_ids=self.st_seed_ids,
+            group_size=10)
+        self.assertEqual(len(groups), 2)
+        self.assertEqual(len(groups[0]) + len(groups[1]), len(self.templates))
+
+    def test_all_different_seeds(self):
+        edited_templates = []
+        rng = np.random.default_rng()
+        for template in self.templates:
+            template = template.copy()
+            choices = rng.choice(len(template.st), 5)
+            template.st = [tr for i, tr in enumerate(template.st)
+                           if i in choices]
+            edited_templates.append(template)
+        groups = group_templates_by_seedid(
+            templates=edited_templates, st_seed_ids=self.st_seed_ids,
+            group_size=10)
+        self.assertEqual(len(groups), 2)
+        self.assertEqual(len(groups[0]) + len(groups[1]), len(self.templates))
+
+    def test_unmatched_dropped(self):
+        edited_templates = [t.copy() for t in self.templates]
+        for tr in edited_templates[0].st:
+            tr.stats.channel = "ABC"
+
+        groups = group_templates_by_seedid(
+            templates=edited_templates, st_seed_ids=self.st_seed_ids,
+            group_size=10)
+        self.assertEqual(len(groups), 2)
+        self.assertEqual(len(groups[0]) + len(groups[1]),
+                         len(self.templates) - 1)
+
+    def test_precomputed_groups(self):
+        presetgroups = [
+            ['0', '2', '4', '6', '8', '10', '12', '14', '16', '18'],
+            ['1', '3', '5', '7', '9', '11', '13', '15', '17', '19']]
+
+        groups = _group(sids=self.st_seed_ids, templates=self.templates,
+                        group_size=10, groups=presetgroups)
+        group_names = [[t.name for t in grp] for grp in groups]
+        self.assertEqual(group_names, presetgroups)
+
+
 def compare_families(party, party_in, float_tol=0.001, check_event=True):
     party.sort()
     party_in.sort()
@@ -1540,7 +1711,8 @@ def compare_families(party, party_in, float_tol=0.001, check_event=True):
 
 def test_match_filter(plot=False, extract_detections=False,
                       threshold_type='MAD', threshold=10,
-                      template_excess=False, stream_excess=False):
+                      template_excess=False, stream_excess=False,
+                      conc_proc=False):
     """
     Function to test the capabilities of match_filter and just check that \
     it is working!  Uses synthetic templates and seeded, randomised data.
@@ -1554,8 +1726,8 @@ def test_match_filter(plot=False, extract_detections=False,
     import inspect
     # Read in the synthetic dataset
     templates = []
-    testing_path = os.path.join(os.path.dirname(os.path.abspath(
-        inspect.getfile(inspect.currentframe()))), 'test_data',
+    testing_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(
+        inspect.getfile(inspect.currentframe())))), 'test_data',
         'synthetic_data')
     templates.append(read(os.path.join(testing_path, 'synth_template_0.ms')))
     templates.append(read(os.path.join(testing_path, 'synth_template_1.ms')))
@@ -1577,7 +1749,7 @@ def test_match_filter(plot=False, extract_detections=False,
     for template in templates:
         for tr in template:
             tr.data += 1  # Make the synthetic data not be all zeros
-        pre_processing.shortproc(
+        pre_processing.multi_process(
             st=template, lowcut=1.0, highcut=4.0, filt_order=3, samp_rate=10.0,
             seisan_chan_names=True)
     template_names = list(string.ascii_lowercase)[0:len(templates)]
@@ -1585,7 +1757,7 @@ def test_match_filter(plot=False, extract_detections=False,
         template_names=template_names, template_list=templates, st=data,
         threshold=threshold, threshold_type=threshold_type, trig_int=6.0,
         plot=plot, plotdir='.', cores=1, output_cat=False,
-        extract_detections=extract_detections)
+        extract_detections=extract_detections, concurrent_processing=conc_proc)
     if extract_detections:
         detection_streams = detections[1]
         detections = detections[0]
diff --git a/eqcorrscan/tests/plotting_test.py b/eqcorrscan/tests/plotting_test.py
index 9ebbb055b..be8e34306 100644
--- a/eqcorrscan/tests/plotting_test.py
+++ b/eqcorrscan/tests/plotting_test.py
@@ -24,12 +24,12 @@
 from eqcorrscan.core import template_gen, subspace
 
 
-class SeimicityPlottingMethods(unittest.TestCase):
+class SeismicityPlottingMethods(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         from obspy.clients.fdsn import Client
         client = Client("IRIS")
-        starttime = UTCDateTime("2000-01-01")
+        starttime = UTCDateTime("2010-01-01")
         endtime = UTCDateTime("2020-05-16")
         cls.catalog = client.get_events(
             starttime=starttime, endtime=endtime, latitude=32.5,
diff --git a/eqcorrscan/tests/pre_processing_test.py b/eqcorrscan/tests/pre_processing_test.py
index 407ab68fd..7b842d0cf 100644
--- a/eqcorrscan/tests/pre_processing_test.py
+++ b/eqcorrscan/tests/pre_processing_test.py
@@ -11,7 +11,107 @@
 from obspy import read, Trace, UTCDateTime, Stream
 
 from eqcorrscan.utils.pre_processing import (
-    process, dayproc, shortproc, _check_daylong, _prep_data_for_correlation)
+    multi_process, _check_daylong, _prep_data_for_correlation,
+    _multi_detrend, _multi_resample, _multi_filter,
+)
+
+
+class TestMultiThreadMethods(unittest.TestCase):
+    """ Compare internal methods to obspy results. """
+    @classmethod
+    def setUpClass(cls):
+        cls.real_st = read(os.path.join(
+            os.path.abspath(os.path.dirname(__file__)), 'test_data',
+            'day_vols', 'Y2012', 'R086.01', '*'))
+        random_st = Stream([Trace(np.random.randn(86401)) for _ in range(9)])
+        for tr in random_st:
+            tr.stats.sampling_rate = 100
+        cls.random_st = random_st
+        short_st = read(os.path.join(
+            os.path.abspath(os.path.dirname(__file__)), 'test_data',
+            "WAV", "TEST_", "2013-09-11-2208-45.DFDPC_030_00"))
+        cls.short_st = Stream(
+            [tr for tr in short_st if tr.stats.sampling_rate == 200.0])
+        # At the moment processing info is not retained in EQcorrscan
+        cls.headers_to_compare = {
+            "network", "station", "location", "channel", "starttime",
+            "endtime", "sampling_rate", "delta", "npts", "calib"}
+
+    def test_resample(self):
+        samp_rates = [v / 10 for v in range(1, 10)]
+        for st, name in zip([self.real_st, self.random_st, self.short_st],
+                            ["real", "random", "short"]):
+            print(f"Running for {name}")
+            for samp_frac in samp_rates:
+                print(f"Checking for {samp_frac} fractional sampling_rate")
+                samp_rate = st[0].stats.sampling_rate * samp_frac
+                acc_resample = _multi_resample(st.copy(), samp_rate)
+                obspy_resample = st.copy().resample(samp_rate)
+                # Order should not be changed so we can loop
+                for acc_tr, obspy_tr in zip(acc_resample, obspy_resample):
+                    for head in self.headers_to_compare:
+                        assert acc_tr.stats[head] == obspy_tr.stats[head]
+                    assert np.allclose(acc_tr.data, obspy_tr.data)
+
+    def test_detrend(self):
+        for st in [self.real_st, self.random_st, self.short_st]:
+            acc_detrend = _multi_detrend(st.copy())
+            obspy_detrend = st.copy().detrend()
+            # Order should not be changed so we can loop
+            for acc_tr, obspy_tr in zip(acc_detrend, obspy_detrend):
+                for head in self.headers_to_compare:
+                    assert acc_tr.stats[head] == obspy_tr.stats[head]
+                assert np.allclose(acc_tr.data, obspy_tr.data)
+
+    def test_bandpass(self):
+        lows = [v / 20 for v in range(1, 9)]
+        highs = [v / 20 for v in range(2, 10)]
+        for st in [self.real_st, self.random_st, self.short_st]:
+            for low, high in zip(lows, highs):
+                lowcut = st[0].stats.sampling_rate * low
+                highcut = st[0].stats.sampling_rate * high
+                acc_filter = _multi_filter(
+                    st.copy(), highcut=highcut, lowcut=lowcut, filt_order=4)
+                obspy_filter = st.copy().filter(
+                    "bandpass", freqmin=lowcut, freqmax=highcut, corners=4,
+                    zerophase=True)
+                # Order should not be changed so we can loop
+                for acc_tr, obspy_tr in zip(acc_filter, obspy_filter):
+                    for head in self.headers_to_compare:
+                        assert acc_tr.stats[head] == obspy_tr.stats[head]
+                    assert np.allclose(acc_tr.data, obspy_tr.data)
+
+    def test_lowpass(self):
+        highs = [v / 20 for v in range(2, 10)]
+        for st in [self.real_st, self.random_st, self.short_st]:
+            for high in highs:
+                highcut = st[0].stats.sampling_rate * high
+                acc_filter = _multi_filter(
+                    st.copy(), highcut=highcut, lowcut=None, filt_order=4)
+                obspy_filter = st.copy().filter(
+                    "lowpass", freq=highcut, corners=4,
+                    zerophase=True)
+                # Order should not be changed so we can loop
+                for acc_tr, obspy_tr in zip(acc_filter, obspy_filter):
+                    for head in self.headers_to_compare:
+                        assert acc_tr.stats[head] == obspy_tr.stats[head]
+                    assert np.allclose(acc_tr.data, obspy_tr.data)
+
+    def test_highpass(self):
+        lows = [v / 20 for v in range(1, 9)]
+        for st in [self.real_st, self.random_st, self.short_st]:
+            for low in lows:
+                lowcut = st[0].stats.sampling_rate * low
+                acc_filter = _multi_filter(
+                    st.copy(), lowcut=lowcut, highcut=None, filt_order=4)
+                obspy_filter = st.copy().filter(
+                    "highpass", freq=lowcut, corners=4,
+                    zerophase=True)
+                # Order should not be changed so we can loop
+                for acc_tr, obspy_tr in zip(acc_filter, obspy_filter):
+                    for head in self.headers_to_compare:
+                        assert acc_tr.stats[head] == obspy_tr.stats[head]
+                    assert np.allclose(acc_tr.data, obspy_tr.data)
 
 
 class TestPreProcessing(unittest.TestCase):
@@ -37,17 +137,17 @@ def setUpClass(cls):
 
     def test_daylong_checks(self):
         """Test that the data are day-long."""
-        self.assertTrue(_check_daylong(self.st[0]))
+        self.assertTrue(_check_daylong(self.st[0].data))
         not_daylong = self.st[0].copy().trim(self.st[0].stats.starttime,
                                              self.st[0].stats.starttime + 3600)
         not_daylong.data = np.append(
             not_daylong.data, np.zeros(
                 3602 * int(self.st[0].stats.sampling_rate)))
-        self.assertFalse(_check_daylong(not_daylong))
+        self.assertFalse(_check_daylong(not_daylong.data))
 
     def test_shortproc(self):
         """Test the short-proc processing method."""
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=False, num_cores=False,
             starttime=None, endtime=None)
@@ -59,13 +159,14 @@ def test_shortproc(self):
     def test_filter_error(self):
         """Check that we don't allow filtering above the nyquist."""
         with self.assertRaises(IOError):
-            shortproc(self.short_stream.copy(), lowcut=0.1, highcut=0.6,
-                      filt_order=4, samp_rate=1, parallel=False,
-                      num_cores=False, starttime=None, endtime=None)
+            multi_process(
+                self.short_stream.copy(), lowcut=0.1, highcut=0.6,
+                filt_order=4, samp_rate=1, parallel=False,
+                num_cores=False, starttime=None, endtime=None)
 
     def test_shortproc_set_start(self):
         """Check that shortproc trims properly."""
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=False, num_cores=False,
             starttime=self.short_stream[0].stats.starttime + 2, endtime=None)
@@ -76,7 +177,7 @@ def test_shortproc_set_start(self):
 
     def test_shortproc_set_end(self):
         """Check that shortproc trims properly."""
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=False, num_cores=False,
             starttime=None, endtime=self.short_stream[0].stats.endtime - 2)
@@ -87,7 +188,7 @@ def test_shortproc_set_end(self):
 
     def test_shortproc_set_start_and_end(self):
         """Check that shortproc trims properly."""
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=False, num_cores=False,
             starttime=self.short_stream[0].stats.starttime + 2,
@@ -101,7 +202,7 @@ def test_trace_as_argument(self):
         """
         Check that we can cope with a trace, and that a trace is returned.
         """
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy()[0], lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=False, num_cores=False,
             starttime=None, endtime=None)
@@ -111,7 +212,7 @@ def test_trace_as_argument(self):
 
     def test_parallel(self):
         """Test the parallel implementation."""
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=True, num_cores=2,
             starttime=None, endtime=None)
@@ -122,7 +223,7 @@ def test_parallel(self):
 
     def test_parallel_core_unset(self):
         """Test the parallel implementation without num_cores set."""
-        processed = shortproc(
+        processed = multi_process(
             self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
             samp_rate=1, parallel=True, num_cores=False,
             starttime=None, endtime=None)
@@ -133,10 +234,10 @@ def test_parallel_core_unset(self):
 
     def test_dayproc(self):
         """Test a straight-forward day processing implementation."""
-        processed = dayproc(
+        processed = multi_process(
             st=self.st.copy(), lowcut=0.1, highcut=0.4, filt_order=3,
             samp_rate=1, starttime=self.day_start, parallel=True,
-            num_cores=2)
+            num_cores=2, daylong=True)
         self.assertEqual(len(processed), self.nchans)
         for tr in processed:
             self.assertEqual(UTCDateTime(self.day_start), tr.stats.starttime)
@@ -146,10 +247,10 @@ def test_dayproc_trace(self):
         """
         Test a straight-forward day processing implementation with a Trace.
         """
-        processed = dayproc(
+        processed = multi_process(
             st=self.st[0].copy(), lowcut=0.1, highcut=0.4, filt_order=3,
             samp_rate=1, starttime=self.day_start, parallel=True,
-            num_cores=2)
+            num_cores=2, daylong=True)
         self.assertTrue(isinstance(processed, Trace))
         self.assertEqual(UTCDateTime(self.day_start),
                          processed.stats.starttime)
@@ -158,16 +259,17 @@ def test_dayproc_trace(self):
     def test_dayproc_nyquist_error(self):
         """Test a failing day processing."""
         with self.assertRaises(IOError):
-            dayproc(st=self.st.copy(), lowcut=0.1, highcut=0.6, filt_order=3,
-                    samp_rate=1, starttime=self.day_start,
-                    parallel=True, num_cores=2)
+            multi_process(
+                st=self.st.copy(), lowcut=0.1, highcut=0.6, filt_order=3,
+                samp_rate=1, starttime=self.day_start,
+                parallel=True, num_cores=2, daylong=True)
 
     def test_dayproc_serial(self):
         """Test the serial implementation of dayproc."""
-        processed = dayproc(st=self.st.copy(), lowcut=0.1, highcut=0.4,
-                            filt_order=3, samp_rate=1,
-                            starttime=self.day_start, parallel=False,
-                            num_cores=2)
+        processed = multi_process(
+            st=self.st.copy(), lowcut=0.1, highcut=0.4, filt_order=3,
+            samp_rate=1, starttime=self.day_start, parallel=False,
+            num_cores=2, daylong=True)
         self.assertEqual(len(processed), self.nchans)
         for tr in processed:
             self.assertEqual(UTCDateTime(self.day_start), tr.stats.starttime)
@@ -175,10 +277,10 @@ def test_dayproc_serial(self):
 
     def test_dayproc_parallel_cores_unset(self):
         """Test a straight-forward day processing implementation."""
-        processed = dayproc(
+        processed = multi_process(
             st=self.st.copy(), lowcut=0.1, highcut=0.4, filt_order=3,
             samp_rate=1, starttime=self.day_start, parallel=True,
-            num_cores=False)
+            num_cores=False, daylong=True)
         self.assertEqual(len(processed), self.nchans)
         for tr in processed:
             self.assertEqual(UTCDateTime(self.day_start), tr.stats.starttime)
@@ -186,26 +288,27 @@ def test_dayproc_parallel_cores_unset(self):
 
     def test_process(self):
         """Test a basic process implementation."""
-        processed = process(tr=self.st[0].copy(), lowcut=0.1, highcut=0.4,
-                            filt_order=3, samp_rate=1,
-                            starttime=False, clip=False, length=86400,
-                            seisan_chan_names=True, ignore_length=False)
+        processed = multi_process(
+            st=self.st[0].copy(), lowcut=0.1, highcut=0.4,
+            filt_order=3, samp_rate=1, starttime=False,
+            daylong=True, seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 86400)
 
     def test_process_datetime(self):
         """Test a basic process implementation."""
-        processed = process(tr=self.st[0].copy(), lowcut=0.1, highcut=0.4,
-                            filt_order=3, samp_rate=1,
-                            starttime=self.day_start, clip=False, length=86400,
-                            seisan_chan_names=True, ignore_length=False)
+        processed = multi_process(
+            st=self.st[0].copy(), lowcut=0.1, highcut=0.4, filt_order=3,
+            samp_rate=1, starttime=self.day_start, daylong=True,
+            seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 86400)
 
     def test_process_nyquist_fail(self):
         """Test a nyquist error is raised."""
         with self.assertRaises(IOError):
-            process(tr=self.st[0].copy(), lowcut=0.1, highcut=0.6,
-                    filt_order=3, samp_rate=1, starttime=False, clip=False,
-                    length=86400, seisan_chan_names=True, ignore_length=False)
+            multi_process(
+                st=self.st[0].copy(), lowcut=0.1, highcut=0.6,
+                filt_order=3, samp_rate=1, starttime=False, daylong=True,
+                seisan_chan_names=True, ignore_length=False)
 
     def test_process_bad_data(self):
         """Check that we won't allow data that are mostly zeros."""
@@ -215,27 +318,28 @@ def test_process_bad_data(self):
             not_daylong.data, np.zeros(
                 3602 * int(self.st[0].stats.sampling_rate)))
         with self.assertRaises(ValueError):
-            process(tr=not_daylong, lowcut=0.1, highcut=0.4,
-                    filt_order=3, samp_rate=1,
-                    starttime=False, clip=False, length=86400,
-                    seisan_chan_names=True, ignore_length=False)
+            multi_process(
+                st=not_daylong, lowcut=0.1, highcut=0.4,
+                filt_order=3, samp_rate=1, starttime=False, daylong=True,
+                seisan_chan_names=True, ignore_length=False)
 
     def test_short_data_fail(self):
         """Check that we don't allow too much missing data."""
         with self.assertRaises(NotImplementedError):
-            process(tr=self.st[0].copy().
-                    trim(endtime=self.st[0].stats.endtime - 18000), lowcut=0.1,
-                    highcut=0.4, filt_order=3, samp_rate=1,
-                    starttime=self.day_start, clip=True, length=86400,
-                    seisan_chan_names=True, ignore_length=False)
+            multi_process(
+                st=self.st[0].copy().trim(
+                    endtime=self.st[0].stats.endtime - 18000),
+                lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1,
+                starttime=self.day_start, daylong=True,
+                seisan_chan_names=True, ignore_length=False)
 
     def test_short_data_pass(self):
         """Check that we do allow missing data if ignore_length is True."""
-        processed = process(
-            tr=self.st[0].copy().trim(endtime=self.
+        processed = multi_process(
+            st=self.st[0].copy().trim(endtime=self.
                                       st[0].stats.endtime - 18000), lowcut=0.1,
             highcut=0.4, filt_order=3, samp_rate=1,
-            starttime=self.day_start, clip=True, length=86400,
+            starttime=self.day_start, daylong=True,
             seisan_chan_names=True, ignore_length=True)
         self.assertEqual(processed.stats.npts, 86400)
 
@@ -244,37 +348,37 @@ def test_short_data_empty_return(self):
         Check that we do not include data that is too short even if
         ignore_bad_data is True.
         """
-        processed = process(
-            tr=self.st[0].copy().trim(endtime=self.
+        processed = multi_process(
+            st=self.st[0].copy().trim(endtime=self.
                                       st[0].stats.endtime - 28000), lowcut=0.1,
             highcut=0.4, filt_order=3, samp_rate=1,
-            starttime=self.day_start, clip=True, length=86400,
+            starttime=self.day_start, daylong=True,
             seisan_chan_names=True, ignore_bad_data=True)
         self.assertEqual(processed.stats.npts, 0)
 
     def test_highcut_debug(self):
         """Test a basic process implementation with just a highcut"""
-        processed = process(tr=self.st[0].copy(), lowcut=None, highcut=0.4,
-                            filt_order=3, samp_rate=1,
-                            starttime=False, clip=False, length=86400,
-                            seisan_chan_names=True, ignore_length=False)
+        processed = multi_process(
+            st=self.st[0].copy(), lowcut=None, highcut=0.4,
+            filt_order=3, samp_rate=1, starttime=False, daylong=True,
+            seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 86400)
 
     def test_lowcut_debug(self):
         """Test a basic process implementation with just a highcut"""
-        processed = process(tr=self.st[0].copy(), lowcut=0.1, highcut=None,
-                            filt_order=3, samp_rate=1,
-                            starttime=False, clip=False, length=86400,
-                            seisan_chan_names=True, ignore_length=False)
+        processed = multi_process(
+            st=self.st[0].copy(), lowcut=0.1, highcut=None,
+            filt_order=3, samp_rate=1, starttime=False, daylong=True,
+            seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 86400)
 
     def test_masked_trace(self):
         """Test that processing a masked array works."""
-        tr = self.gappy_trace
-        processed = process(tr=tr, lowcut=0.1, highcut=0.4,
-                            filt_order=3, samp_rate=1,
-                            starttime=False, clip=False, length=3600,
-                            seisan_chan_names=True, ignore_length=False)
+        tr = self.gappy_trace.copy()
+        processed = multi_process(
+            st=tr, lowcut=0.1, highcut=0.4,
+            filt_order=3, samp_rate=1, starttime=False,
+            seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 3601)
         self.assertFalse(isinstance(processed.data, np.ma.MaskedArray))
         self.assertTrue(np.all(
@@ -282,22 +386,20 @@ def test_masked_trace(self):
 
     def test_masked_trace_no_fill(self):
         """Test that processing a masked array without filling gaps works."""
-        tr = self.gappy_trace
-        processed = process(tr=tr, lowcut=0.1, highcut=0.4,
-                            filt_order=3, samp_rate=1,
-                            starttime=False, clip=False, length=3600,
-                            seisan_chan_names=True, ignore_length=False,
-                            fill_gaps=False)
+        tr = self.gappy_trace.copy()
+        processed = multi_process(
+            st=tr, lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1,
+            starttime=False, seisan_chan_names=True, ignore_length=False,
+            fill_gaps=False)
         self.assertEqual(processed.stats.npts, 3601)
         self.assertTrue(isinstance(processed.data, np.ma.MaskedArray))
 
     def test_masked_array_resample(self):
         """Test that processing and resampling a masked array works."""
-        tr = self.gappy_trace
-        processed = process(tr=tr, lowcut=0.1, highcut=0.2,
-                            filt_order=3, samp_rate=0.5,
-                            starttime=False, clip=False, length=3600,
-                            seisan_chan_names=True, ignore_length=False)
+        tr = self.gappy_trace.copy()
+        processed = multi_process(
+            st=tr, lowcut=0.1, highcut=0.2, filt_order=3, samp_rate=0.5,
+            starttime=False, seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 1800)
         self.assertTrue(np.all(
             processed.trim(self.gap_starttime, self.gap_endtime).data) == 0)
@@ -309,14 +411,17 @@ def test_gap_overlength(self):
         tr_before = self.gappy_trace.copy()
         tr_before.stats.starttime -= 3600
         tr = tr_before + self.gappy_trace + tr_after
-        processed = process(
-            tr=tr, lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1,
-            starttime=self.gappy_trace.stats.starttime, clip=True, length=3600,
+        processed = multi_process(
+            st=tr, lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1,
+            starttime=self.gappy_trace.stats.starttime,
+            endtime=self.gappy_trace.stats.starttime + 3600,
             seisan_chan_names=True, ignore_length=False)
         self.assertEqual(processed.stats.npts, 3600)
         self.assertFalse(isinstance(processed.data, np.ma.MaskedArray))
         self.assertTrue(np.all(
             processed.trim(self.gap_starttime, self.gap_endtime).data) == 0)
+        # Check that there is actually data there!
+        self.assertEqual(np.isnan(processed.data).sum(), 0)
 
 
 class TestDataPrep(unittest.TestCase):
diff --git a/eqcorrscan/tests/subspace_test.py b/eqcorrscan/tests/subspace_test.py
index 301961eff..d4c81b104 100644
--- a/eqcorrscan/tests/subspace_test.py
+++ b/eqcorrscan/tests/subspace_test.py
@@ -137,9 +137,13 @@ def test_create_multiplexed_unaligned(self):
             self.assertTrue(np.allclose(
                 identity, np.diag(np.ones(len(identity), dtype=np.float16))))
         comparison_detector = subspace.read_detector(
-                os.path.join(os.path.abspath(
-                    os.path.dirname(__file__)), 'test_data', 'subspace',
-                    'master_detector_multi_unaligned.h5'))
+            os.path.join(os.path.abspath(
+                os.path.dirname(__file__)), 'test_data', 'subspace',
+                'master_detector_multi_unaligned.h5'))
+        # Run to re-fresh file after SVD changes upstream
+        # detector.write(os.path.join(os.path.abspath(
+        #             os.path.dirname(__file__)), 'test_data', 'subspace',
+        #             'master_detector_multi_unaligned.h5'))
         for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut',
                     'filt_order', 'dimension', 'stachans']:
             # print(key)
@@ -150,9 +154,14 @@ def test_create_multiplexed_unaligned(self):
             other_list = comparison_detector.__getattribute__(key)
             self.assertEqual(len(list_item), len(other_list))
             for item, other_item in zip(list_item, other_list):
+                print(f"{key} is not equal")
                 if not np.allclose(np.abs(item), np.abs(other_item)):
                     print(item)
                     print(other_item)
+                    print("Differences:")
+                    print(item - other_item)
+                    print(f"Max difference: "
+                          f"{np.max(np.abs(item - other_item))}")
                 self.assertTrue(np.allclose(np.abs(item), np.abs(other_item),
                                             atol=0.001))
         # Finally check that the __eq__ method works if all the above passes.
@@ -175,20 +184,29 @@ def test_create_nonmultiplexed_unaligned(self):
             os.path.join(
                 os.path.abspath(os.path.dirname(__file__)),
                 'test_data', 'subspace', 'master_detector_unaligned.h5'))
+        # Run to re-fresh file after SVD changes upstream
+        # detector.write(os.path.join(
+        #         os.path.abspath(os.path.dirname(__file__)),
+        #         'test_data', 'subspace', 'master_detector_unaligned.h5'))
         for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut',
                     'filt_order', 'dimension', 'stachans']:
             # print(key)
             self.assertEqual(comparison_detector.__getattribute__(key),
                              detector.__getattribute__(key))
-        for key in ['data', 'u', 'v', 'sigma']:
+        for key in ['sigma', 'v', 'u', 'data']:
             # print(key)
             list_item = detector.__getattribute__(key)
             other_list = comparison_detector.__getattribute__(key)
             self.assertEqual(len(list_item), len(other_list))
             for item, other_item in zip(list_item, other_list):
                 if not np.allclose(np.abs(item), np.abs(other_item)):
+                    print(f"Well fuck. {key} is different...")
                     print(item)
                     print(other_item)
+                    print("Differences:")
+                    print(item - other_item)
+                    print(f"Max difference: "
+                          f"{np.max(np.abs(item - other_item))}")
                 self.assertTrue(np.allclose(np.abs(item), np.abs(other_item),
                                             atol=0.001))
         # Finally check that the __eq__ method works if all the above passes.
@@ -212,6 +230,10 @@ def test_create_multiplexed_aligned(self):
             os.path.join(os.path.abspath(
                 os.path.dirname(__file__)), 'test_data', 'subspace',
                 'master_detector_multi.h5'))
+        # Run to re-fresh file after SVD changes upstream
+        # detector.write(os.path.join(os.path.abspath(
+        #         os.path.dirname(__file__)), 'test_data', 'subspace',
+        #         'master_detector_multi.h5'))
         for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut',
                     'filt_order', 'dimension', 'stachans']:
             # print(key)
@@ -248,6 +270,9 @@ def test_create_nonmultiplexed_aligned(self):
         comparison_detector = subspace.read_detector(
             os.path.join(os.path.abspath(os.path.dirname(__file__)),
                          'test_data', 'subspace', 'master_detector.h5'))
+        # Run to re-fresh file after SVD changes upstream
+        # detector.write(os.path.join(os.path.abspath(os.path.dirname(__file__)),
+        #                'test_data', 'subspace', 'master_detector.h5'))
         for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut',
                     'filt_order', 'dimension', 'stachans']:
             # print(key)
diff --git a/eqcorrscan/tests/template_gen_test.py b/eqcorrscan/tests/template_gen_test.py
index 86aef6e14..fed6a122a 100644
--- a/eqcorrscan/tests/template_gen_test.py
+++ b/eqcorrscan/tests/template_gen_test.py
@@ -101,8 +101,9 @@ def test_tutorial_template_gen(self):
             for tr in template:
                 expected_tr = expected_template.select(
                     station=tr.stats.station, channel=tr.stats.channel)[0]
-                self.assertTrue((expected_tr.data.astype(np.float32) ==
-                                 tr.data.astype(np.float32)).all())
+                self.assertTrue(np.allclose(
+                    expected_tr.data.astype(np.float32),
+                    tr.data.astype(np.float32), rtol=0.0001))
             del template
             os.remove('tutorial_template_' + str(template_no) + '.ms')
 
@@ -458,7 +459,8 @@ def test_swin_P(self):
 
     def test_swin_all_and_all_horiz(self):
         template = _template_gen(self.picks, self.st.copy(), 10, swin='all',
-                                 all_horiz=True)
+                                 all_horiz=True,
+                                 horizontal_chans=['E', 'N', '1', '2', '3'])
         for pick in self.picks:
             if pick.phase_hint == 'S':
                 self.assertGreaterEqual(
@@ -487,6 +489,57 @@ def test_triggered_data(self):
         self.assertEqual(len(templates), 0)
 
 
+class TestEdgeGenObs(unittest.TestCase):
+    @classmethod
+    # Extra test case with OBS data with hydrophone channels (HDH) and T-phases
+    def setUpClass(cls):
+        import eqcorrscan
+        cls.testing_path = os.path.dirname(eqcorrscan.__file__) + '/tests'
+        log = logging.getLogger(template_gen_module.__name__)
+        cls._log_handler = MockLoggingHandler(level='DEBUG')
+        log.addHandler(cls._log_handler)
+        cls.log_messages = cls._log_handler.messages
+        cls.st = read(os.path.join(
+            cls.testing_path, 'test_data', 'WAV', 'TEST_',
+            '2019-08-09-1558-47M.NNSN__038'))
+        # for tr in cls.st:
+        #     tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1]
+        # Sfile in New Nordic format
+        event = read_events(os.path.join(
+            cls.testing_path, 'test_data', 'REA', 'TEST_',
+            '09-1558-48R.S201908'))[0]
+        cat = filter_picks(
+            Catalog([event]), stations=['KBS', 'OBIN1', 'OBIN2', 'SPA0',
+                                        'NOR', 'DAG', 'HOPEN', 'HSPB'])
+        cls.picks = cat[0].picks
+
+    def setUp(self):
+        self._log_handler.reset()
+
+    def test_swin_all_and_all_vert_and_all_horiz(self):
+        # Test that the hydrophone channel on an OBS is included in the
+        # creation of the vertical channel (P-arrival) template.
+        template = _template_gen(self.picks, self.st.copy(), 20, swin='all',
+                                 all_horiz=True, all_vert=True,
+                                 vertical_chans=['Z', 'H'])
+        for pick in self.picks:
+            if pick.phase_hint and pick.phase_hint[0] == 'P':
+                self.assertGreaterEqual(
+                    len(template.select(
+                        station=pick.waveform_id.station_code,
+                        channel='??[ZH]')), 1)
+            if pick.phase_hint and pick.phase_hint[0] == 'S':
+                self.assertGreaterEqual(
+                    len(template.select(
+                        station=pick.waveform_id.station_code,
+                        channel='??[NE12]')), 2)
+            if pick.phase_hint and pick.phase_hint[0] == 'T':
+                self.assertGreaterEqual(
+                    len(template.select(
+                        station=pick.waveform_id.station_code,
+                        channel='??[ZH]')), 2)
+
+
 class TestDayLong(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
diff --git a/eqcorrscan/tests/test_data/REA/TEST_/09-1558-48R.S201908 b/eqcorrscan/tests/test_data/REA/TEST_/09-1558-48R.S201908
new file mode 100644
index 000000000..87bf7ea6a
--- /dev/null
+++ b/eqcorrscan/tests/test_data/REA/TEST_/09-1558-48R.S201908
@@ -0,0 +1,52 @@
+ 2019 0809 1558 48.1 RQ 78.038   7.318 29.5  BER 11 .90 4.0wBER                1
+ GAP=103   BER  2.53       6.6    17.9 10.0  0.9089E+01 -0.5703E+02  0.1273E+01E
+ 2019-08-09-1558-47M.NNSN__038                                                 6
+ 643  SVALBARD REGION                                                          3
+ Calculated depth is uncertain, set to 10km                                    3
+ OLDACT:SPL 22-07-27 09:55 OP:fh   STATUS:               ID:20190809155848     3
+ Action:UPS 22-07-28 16:35 OP:fh   STATUS:               ID:20190809155848 S   3
+ OLDACT:ARX 22-08-01 11:49 OP:fh   STATUS:               ID:20190809155848 S   3
+ Action:SPL 22-08-01 11:49 OP:fh   STATUS:               ID:20190809155848 S   3
+ OLDACT:ARX 22-08-01 11:51 OP:fh   STATUS:               ID:20190809155848 S   3
+ Action:SPL 22-08-01 11:51 OP:fh   STATUS:               ID:20190809155848 S   3
+ Action:SPL 22-08-01 12:37 OP:fh   STATUS:               ID:20190809155848 S   I
+ OLDACT:ARX 22-08-01 12:37 OP:fh   STATUS:               ID:20190809155848 S   3
+ STAT COM NTLO IPHASE   W HHMM SS.SSS   PAR1  PAR2 AGA OPE  AIN  RES W  DIS CAZ7
+ OBIN3HH2      I          1559 10.890                                   161 219 
+ OBIN3HH1      ES         1559 27.290                      62.0-0.0510  161 219 
+ OBIN3HHH      ET         1600 25.130                      50.0-10.6 0  161 219 
+ OBIN1HDH      ET         1600 48.020                       0.0 2.13 0  177 315 
+ SPA0 HHZ       Pn        1559 18.190                      62.0 1.2810  209  81 
+ SPA0 HHZ       BAZ-Pn    1559 18.190  259.5  13.9              -11.    209  81 
+ SPA0 HHE       BAZ-Sn    1559 39.650  265.9  14.4               -4.    209  81 
+ SPA0 HHE       Sn        1559 39.650                      62.0 1.4510  209  81 
+ HOPENHHZ      EP         1559 48.940                      59.0 0.4110  465 103 
+ HOPENHHE      ES         1600 33.720                      59.0 0.4910  465 103 
+ HOPENHHZ       IAML      1600 35.510   39.1  0.24                      465 103 
+ NOR  BHZ      EP         1600 06.470                      59.0 0.3410  611 321 
+ NOR  BHE       Sn        1601 05.890                      62.0 0.8710  611 321 
+ DAG  BHZ      EP         1600 11.400                      59.0 1.4210  642 270 
+ DAG  BHE      ES         1601 10.810                      59.0 0.2610  642 270 
+ DBG  BHZ       Pn        1600 34.550                      62.0 0.0810  833 254 
+ DBG  BHE       Sn        1601 52.100                      62.0-1.0710  833 254 
+ ARA0 SHZ       Sn      4 1602 46.290                      62.0-3.41 0 1095 140 
+ ARA0 SHZ       BAZ-Sn  4 1602 46.290  335.2  19.0               -2.   1095 140 
+ KBS  HHZ IU10  IAML      1559 23.970  137.0  0.20                      142  44 
+ KBS  HHZ IU10 EP         1559 08.097      C       BER fh                       
+ KBS  HH1 IU10 ES         1559 21.879              BER fh                       
+ BRBA HHZ NO00  IAML      1559 31.610  124.1  0.24                      160  86 
+ BRBA HHZ NO00 EP         1559 10.715              BER fh                       
+ BRBA HHN NO00 ES         1559 26.341              BER fh                       
+ HSPB HHZ PL    IAML      1559 46.140   42.1  0.34                      229 116 
+ HSPB HHZ PL   EP         1559 19.781              BER fh                       
+ HSPB HHN PL   E          1559 37.345              BER fh                       
+ HSPB HHN PL   ES         1559 42.057              BER fh                       
+ OBIN1HHZ 5F00  IAML      1559 31.450  715.9  0.14                      177 315 
+ OBIN1HHZ 5F00  A         1559 31.450  715.9  0.14                      177 315 
+ OBIN1HHZ 5F00  IAML      1559 31.580 1010.2  0.14 BER fh               177 315 
+ OBIN1HHZ 5F00 IP         1559 11.951              BER fh                       
+ OBIN1HH2 5F00 ES         1559 30.318              BER fh                       
+ OBIN2HH2 5F00 ES         1559 01.118              BER fh                       
+ OBIN2HDH 5F00 E          1559 19.780                                  41.8 279 
+ OBIN2HDH 5F00 IP         1558 55.156              BER fh                       
+                                                                                
diff --git a/eqcorrscan/tests/test_data/WAV/TEST_/2019-08-09-1558-47M.NNSN__038 b/eqcorrscan/tests/test_data/WAV/TEST_/2019-08-09-1558-47M.NNSN__038
new file mode 100644
index 000000000..9294eed1c
Binary files /dev/null and b/eqcorrscan/tests/test_data/WAV/TEST_/2019-08-09-1558-47M.NNSN__038 differ
diff --git a/eqcorrscan/tests/test_data/subspace/master_detector.h5 b/eqcorrscan/tests/test_data/subspace/master_detector.h5
index 058316ff7..a595a5881 100644
Binary files a/eqcorrscan/tests/test_data/subspace/master_detector.h5 and b/eqcorrscan/tests/test_data/subspace/master_detector.h5 differ
diff --git a/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5 b/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5
index fb59e4ea6..ac0d5f81a 100644
Binary files a/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5 and b/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5 differ
diff --git a/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5 b/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5
index f3064a4db..5dc5e3752 100644
Binary files a/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5 and b/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5 differ
diff --git a/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5 b/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5
index ef5c40845..b7667a248 100644
Binary files a/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5 and b/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5 differ
diff --git a/eqcorrscan/tests/tutorials_test.py b/eqcorrscan/tests/tutorials_test.py
index 91bda7d20..756c93b56 100644
--- a/eqcorrscan/tests/tutorials_test.py
+++ b/eqcorrscan/tests/tutorials_test.py
@@ -6,11 +6,7 @@
 import os
 import pytest
 
-from obspy import read
-
-from eqcorrscan.tutorials.template_creation import mktemplates
-from eqcorrscan.tutorials import match_filter, lag_calc, subspace
-from eqcorrscan.core.match_filter import read_detections
+from eqcorrscan.tutorials import subspace
 
 
 class TestTutorialScripts(unittest.TestCase):
@@ -19,95 +15,6 @@ def setUpClass(cls):
         cls.testing_path = os.path.join(
             os.path.abspath(os.path.dirname(__file__)), 'test_data')
 
-    @pytest.mark.slow
-    def test_templates_and_match(self):
-        """Call the template creation then the matched-filter tests."""
-        try:
-            print("Making templates")
-            # Some output for travis to stop it from stalling
-            mktemplates(plot=False)
-            print("Made templates")
-            for template_no in range(4):
-                template = read(
-                    'tutorial_template_' + str(template_no) + '.ms')
-                expected_template = read(
-                    os.path.join(
-                        self.testing_path,
-                        'tutorial_template_' + str(template_no) + '.ms'))
-                # self.assertTrue(len(template) > 1)
-                self.assertEqual(len(template), len(expected_template))
-            # Run the matched-filter
-            print("Running the match-filter")
-            tutorial_detections = match_filter.run_tutorial(
-                plot=False, num_cores=1)
-            print("Match-filter ran")
-            # It should make 20 detections in total...
-            fname = os.path.join(self.testing_path,
-                                 'expected_tutorial_detections.txt')
-            expected_detections = read_detections(fname)
-
-            expected_times = [detection.detect_time for detection
-                              in expected_detections]
-            for expected_time in expected_times:
-                expected_time.precision = 3  # Lower the precision slightly
-            # expected_correlations = [
-            #     round(detection.detect_val, 4)
-            #     for detection in expected_detections]
-            for detection in tutorial_detections:
-                assert (detection.detect_val < detection.no_chans)
-                detection.detect_time.precision = 3
-                self.assertIn(
-                    detection.detect_time, expected_times,
-                    msg='Detection at %s is not in expected detections'
-                    % detection.detect_time)
-            if len(expected_detections) > len(tutorial_detections):
-                # This is a fail but we are trying to debug
-                actual_times = [tutorial_detection.detect_time
-                                for tutorial_detection in tutorial_detections]
-                for detection in expected_detections:
-                    self.assertIn(detection.detect_time, actual_times,
-                                  msg='Expected detection at %s was not made'
-                                  % detection.detect_time)
-            self.assertEqual(len(tutorial_detections), 23)
-        finally:
-            for template_no in range(4):
-                if os.path.isfile('tutorial_template_' +
-                                  str(template_no) + '.ms'):
-                    os.remove('tutorial_template_' + str(template_no) + '.ms')
-
-    @pytest.mark.slow
-    def test_lag_calc(self):
-        """Test the lag calculation tutorial."""
-        shift_len = 0.2
-        min_mag = 4
-        print("Running lag-calc")
-        detections, picked_catalog, templates, template_names = \
-            lag_calc.run_tutorial(min_magnitude=min_mag, shift_len=shift_len,
-                                  num_cores=1)
-        print("Lag-calc ran")
-        self.assertEqual(len(picked_catalog), len(detections))
-        self.assertEqual(len(detections), 8)
-        # Debug for travis OSX fails
-        for detection in detections:
-            assert detection.detect_val < detection.no_chans
-        for event, detection in zip(picked_catalog, detections):
-            template = [t[0] for t in zip(templates, template_names)
-                        if t[1] == detection.template_name][0]
-            template_stachans = [(tr.stats.station, tr.stats.channel)
-                                 for tr in template]
-            for pick in event.picks:
-                # First check that there is a template for the pick
-                stachan = (pick.waveform_id.station_code,
-                           pick.waveform_id.channel_code)
-                self.assertTrue(stachan in template_stachans)
-                # Now check that the pick time is within +/- shift_len of
-                # The template
-                tr = template.select(station=stachan[0], channel=stachan[1])[0]
-                delay = tr.stats.starttime - \
-                    template.sort(['starttime'])[0].stats.starttime
-                re_picked_delay = pick.time - (detection.detect_time + delay)
-                self.assertTrue(abs(re_picked_delay) < shift_len)
-
     @pytest.mark.superslow
     def test_subspace(self):
         """Test the subspace tutorial."""
diff --git a/eqcorrscan/tutorials/lag_calc.py b/eqcorrscan/tutorials/lag_calc.py
deleted file mode 100644
index 2320750cb..000000000
--- a/eqcorrscan/tutorials/lag_calc.py
+++ /dev/null
@@ -1,109 +0,0 @@
-"""Tutorial to illustrate the lag_calc usage."""
-import logging
-from multiprocessing import cpu_count
-
-from obspy.clients.fdsn import Client
-from obspy.clients.fdsn.header import FDSNException
-from obspy.core.event import Catalog
-from obspy import UTCDateTime, Stream
-
-from eqcorrscan.core import template_gen, match_filter, lag_calc
-from eqcorrscan.utils import pre_processing, catalog_utils
-
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
-
-
-def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4, min_cc=0.5):
-    """Functional, tested example script for running the lag-calc tutorial."""
-    if num_cores > cpu_count():
-        num_cores = cpu_count()
-    client = Client('NCEDC')
-    t1 = UTCDateTime(2004, 9, 28)
-    t2 = t1 + 86400
-    print('Downloading catalog')
-    catalog = client.get_events(
-        starttime=t1, endtime=t2, minmagnitude=min_magnitude,
-        minlatitude=35.7, maxlatitude=36.1, minlongitude=-120.6,
-        maxlongitude=-120.2, includearrivals=True)
-    # We don't need all the picks, lets take the information from the
-    # five most used stations - note that this is done to reduce computational
-    # costs.
-    catalog = catalog_utils.filter_picks(
-        catalog, channels=['EHZ'], top_n_picks=5)
-    # There is a duplicate pick in event 3 in the catalog - this has the effect
-    # of reducing our detections - check it yourself.
-    for pick in catalog[3].picks:
-        if pick.waveform_id.station_code == 'PHOB' and \
-                        pick.onset == 'emergent':
-            catalog[3].picks.remove(pick)
-    print('Generating templates')
-    templates = template_gen.template_gen(
-        method="from_client", catalog=catalog, client_id='NCEDC',
-        lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0,
-        prepick=0.15, swin='all', process_len=3600)
-    # In this section we generate a series of chunks of data.
-    start_time = UTCDateTime(2004, 9, 28, 17)
-    end_time = UTCDateTime(2004, 9, 28, 20)
-    process_len = 3600
-    chunks = []
-    chunk_start = start_time
-    while chunk_start < end_time:
-        chunk_end = chunk_start + process_len
-        if chunk_end > end_time:
-            chunk_end = end_time
-        chunks.append((chunk_start, chunk_end))
-        chunk_start += process_len
-
-    all_detections = []
-    picked_catalog = Catalog()
-    template_names = [template[0].stats.starttime.strftime("%Y%m%d_%H%M%S")
-                      for template in templates]
-    for t1, t2 in chunks:
-        print('Downloading and processing for start-time: %s' % t1)
-        # Download and process the data
-        bulk_info = [(tr.stats.network, tr.stats.station, '*',
-                      tr.stats.channel, t1, t2) for tr in templates[0]]
-        # Just downloading a chunk of data
-        try:
-            st = client.get_waveforms_bulk(bulk_info)
-        except FDSNException:
-            st = Stream()
-            for _bulk in bulk_info:
-                st += client.get_waveforms(*_bulk)
-        st.merge(fill_value='interpolate')
-        st = pre_processing.shortproc(
-            st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0,
-            num_cores=num_cores)
-        detections = match_filter.match_filter(
-            template_names=template_names, template_list=templates, st=st,
-            threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False,
-            plotdir='.', cores=num_cores)
-        # Extract unique detections from set.
-        unique_detections = []
-        for master in detections:
-            keep = True
-            for slave in detections:
-                if not master == slave and\
-                   abs(master.detect_time - slave.detect_time) <= 1.0:
-                    # If the events are within 1s of each other then test which
-                    # was the 'best' match, strongest detection
-                    if not master.detect_val > slave.detect_val:
-                        keep = False
-                        break
-            if keep:
-                unique_detections.append(master)
-        all_detections += unique_detections
-
-        picked_catalog += lag_calc.lag_calc(
-            detections=unique_detections, detect_data=st,
-            template_names=template_names, templates=templates,
-            shift_len=shift_len, min_cc=min_cc, interpolate=False, plot=False)
-    # Return all of this so that we can use this function for testing.
-    return all_detections, picked_catalog, templates, template_names
-
-
-if __name__ == '__main__':
-    run_tutorial(min_magnitude=4, num_cores=cpu_count())
diff --git a/eqcorrscan/tutorials/match_filter.py b/eqcorrscan/tutorials/match_filter.py
deleted file mode 100644
index 566a98c89..000000000
--- a/eqcorrscan/tutorials/match_filter.py
+++ /dev/null
@@ -1,160 +0,0 @@
-"""
-Simple tutorial to demonstrate some of the basic capabilities of the EQcorrscan
-matched-filter detection routine.  This builds on the template generation
-tutorial and uses those templates.  If you haven't run that tutorial script
-then you will need to before you can run this script.
-"""
-
-import glob
-import logging
-
-from http.client import IncompleteRead
-from multiprocessing import cpu_count
-from obspy.clients.fdsn import Client
-from obspy import UTCDateTime, Stream, read
-
-from eqcorrscan.utils import pre_processing
-from eqcorrscan.utils import plotting
-from eqcorrscan.core import match_filter
-
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
-
-
-def run_tutorial(plot=False, process_len=3600, num_cores=cpu_count(),
-                 **kwargs):
-    """Main function to run the tutorial dataset."""
-    # First we want to load our templates
-    template_names = glob.glob('tutorial_template_*.ms')
-
-    if len(template_names) == 0:
-        raise IOError('Template files not found, have you run the template ' +
-                      'creation tutorial?')
-
-    templates = [read(template_name) for template_name in template_names]
-
-    # Work out what stations we have and get the data for them
-    stations = []
-    for template in templates:
-        for tr in template:
-            stations.append((tr.stats.station, tr.stats.channel))
-    # Get a unique list of stations
-    stations = list(set(stations))
-
-    # We will loop through the data chunks at a time, these chunks can be any
-    # size, in general we have used 1 day as our standard, but this can be
-    # as short as five minutes (for MAD thresholds) or shorter for other
-    # threshold metrics. However the chunk size should be the same as your
-    # template process_len.
-
-    # To ensure a complete catalogue you should overlap your chunks by at least
-    # the largest moveout in your template set
-    overlap = 0
-    for template in templates:
-        template_starttime = min(tr.stats.starttime for tr in template)
-        template_overlap = max([tr.stats.starttime - template_starttime
-                                for tr in template])
-        if template_overlap > overlap:
-            overlap = template_overlap
-    overlap = 10 * ((overlap // 10) + 1)  # Round to nearest whole 10 seconds
-
-    # You should test different parameters!!!
-    start_time = UTCDateTime(2016, 1, 4)
-    end_time = UTCDateTime(2016, 1, 5)
-    chunks = []
-    chunk_start = start_time
-    step_length = process_len - overlap
-    assert step_length > 0, "Positive step length required - " \
-                            "try longer process length"
-    while chunk_start < end_time:
-        chunk_end = chunk_start + process_len
-        if chunk_end > end_time:
-            chunk_end = end_time
-        chunks.append((chunk_start, chunk_end))
-        chunk_start += step_length
-
-    unique_detections = []
-
-    # Set up a client to access the GeoNet database
-    client = Client("GEONET")
-
-    # Note that these chunks do not rely on each other, and could be paralleled
-    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
-    # an example of this.
-    for t1, t2 in chunks:
-        # Generate the bulk information to query the GeoNet database
-        bulk_info = []
-        for station in stations:
-            bulk_info.append(('NZ', station[0], '*',
-                              station[1][0] + 'H' + station[1][-1], t1, t2))
-
-        # Note this will take a little while.
-        print('Downloading seismic data, this may take a while')
-        st = Stream()
-        for _bulk in bulk_info:
-            try:
-                st += client.get_waveforms(*_bulk)
-            except IncompleteRead:
-                print(f"Could not download {_bulk}")
-        # Merge the stream, it will be downloaded in chunks
-        st.merge()
-
-        # Pre-process the data to set frequency band and sampling rate
-        # Note that this is, and MUST BE the same as the parameters used for
-        # the template creation.
-        print('Processing the seismic data')
-        st = pre_processing.shortproc(
-            st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0,
-            num_cores=num_cores, starttime=t1, endtime=t2)
-        # Convert from list to stream
-        st = Stream(st)
-
-        # Now we can conduct the matched-filter detection
-        detections = match_filter.match_filter(
-            template_names=template_names, template_list=templates,
-            st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0,
-            plot=plot, plotdir='.', cores=num_cores,
-            plot_format='png', **kwargs)
-
-        # Now lets try and work out how many unique events we have just to
-        # compare with the GeoNet catalog of 20 events on this day in this
-        # sequence
-        for parent in detections:
-            keep = True
-            for child in detections:
-                if not parent == child and abs(parent.detect_time -
-                                               child.detect_time) <= 1.0:
-                    # If the events are within 1s of each other then test which
-                    # was the 'best' match, strongest detection
-                    if not parent.detect_val > child.detect_val:
-                        keep = False
-                        print('Removed detection at %s with cccsum %s'
-                              % (parent.detect_time, parent.detect_val))
-                        print('Keeping detection at %s with cccsum %s'
-                              % (child.detect_time, child.detect_val))
-                        break
-            if keep:
-                unique_detections.append(parent)
-                print('Detection at :' + str(parent.detect_time) +
-                      ' for template ' + parent.template_name +
-                      ' with a cross-correlation sum of: ' +
-                      str(parent.detect_val))
-                # We can plot these too
-                if plot:
-                    stplot = st.copy()
-                    template = templates[template_names.index(
-                        parent.template_name)]
-                    lags = sorted([tr.stats.starttime for tr in template])
-                    maxlag = lags[-1] - lags[0]
-                    stplot.trim(starttime=parent.detect_time - 10,
-                                endtime=parent.detect_time + maxlag + 10)
-                    plotting.detection_multiplot(
-                        stplot, template, [parent.detect_time.datetime])
-    print('We made a total of ' + str(len(unique_detections)) + ' detections')
-    return unique_detections
-
-
-if __name__ == '__main__':
-    run_tutorial()
diff --git a/eqcorrscan/utils/catalog_to_dd.py b/eqcorrscan/utils/catalog_to_dd.py
index 263a9d2ab..abd5b39e0 100644
--- a/eqcorrscan/utils/catalog_to_dd.py
+++ b/eqcorrscan/utils/catalog_to_dd.py
@@ -11,8 +11,7 @@
 import numpy as np
 import logging
 from collections import namedtuple, defaultdict, Counter
-from obspy.core import stream
-from multiprocessing import cpu_count, Pool
+from multiprocessing import cpu_count, Pool, shared_memory
 
 from obspy import UTCDateTime, Stream
 from obspy.core.event import (
@@ -208,8 +207,20 @@ def _prepare_stream(stream, event, extract_len, pre_pick, seed_pick_ids=None):
                 seed_id=seed_pick_id.seed_id))
             continue
         tr = tr[0]
-        if tr.stats.endtime - tr.stats.starttime != extract_len:
-            Logger.warning(f"Insufficient data for {tr.id}, discarding")
+
+        # If there is one sample too many after this remove the first one
+        # by convention
+        n_samples_intended = extract_len * tr.stats.sampling_rate
+        if len(tr.data) == n_samples_intended + 1:
+            tr.data = tr.data[1:len(tr.data)]
+        # if tr.stats.endtime - tr.stats.starttime != extract_len:
+        if tr.stats.npts < n_samples_intended:
+            Logger.warning(
+                "Insufficient data ({rlen} s) for {tr_id}, discarding. Check "
+                "that your traces are at least of length {length} s, with a "
+                "pre_pick time of at least {prepick} s!".format(
+                    rlen=tr.stats.endtime - tr.stats.starttime,
+                    tr_id=tr.id, length=extract_len, prepick=pre_pick))
             continue
         stream_sliced.update(
             {seed_pick_id.phase_hint:
@@ -221,12 +232,26 @@ def _prepare_stream(stream, event, extract_len, pre_pick, seed_pick_ids=None):
 
 def _compute_dt_correlations(catalog, master, min_link, event_id_mapper,
                              stream_dict, min_cc, extract_len, pre_pick,
-                             shift_len, interpolate, max_workers=1, **kwargs):
+                             shift_len, interpolate, max_workers=1,
+                             shm_data_shape=None, shm_dtype=None,
+                             weight_by_square=True, **kwargs):
     """ Compute cross-correlation delay times. """
     max_workers = max_workers or 1
     Logger.info(
         f"Correlating {str(master.resource_id)} with {len(catalog)} events")
     differential_times_dict = dict()
+    # Assign trace data from shared memory
+    for (key, stream) in stream_dict.items():
+        for tr in stream:
+            if len(tr.data) == 0 and hasattr(tr, 'shared_memory_name'):
+                shm = shared_memory.SharedMemory(name=tr.shared_memory_name)
+                # Reconstructing numpy data array
+                sm_data = np.ndarray(
+                    shm_data_shape, dtype=shm_dtype, buffer=shm.buf)
+                tr.data = np.zeros_like(sm_data)
+                # Copy data into process memory
+                tr.data[:] = sm_data[:]
+
     master_stream = _prepare_stream(
         stream=stream_dict[str(master.resource_id)], event=master,
         extract_len=extract_len, pre_pick=pre_pick)
@@ -379,11 +404,14 @@ def _compute_dt_correlations(catalog, master, min_link, event_id_mapper,
                             event_id_1=event_id_mapper[
                                 str(master.resource_id)],
                             event_id_2=event_id_mapper[used_event_id])
+                    weight = cc_max
+                    if weight_by_square:
+                        weight **= 2
                     diff_time.obs.append(
                         _DTObs(station=chan.channel[0],
                                tt1=master_tts["{0}_{1}".format(
                                    chan.channel[0], phase_hint)],
-                               tt2=tt2, weight=cc_max ** 2,
+                               tt2=tt2, weight=weight,
                                phase=phase_hint[0]))
                     differential_times_dict.update({used_event_id: diff_time})
     # Threshold on min_link
@@ -465,12 +493,67 @@ def _prep_horiz_picks(catalog, stream_dict, event_id_mapper):
     return catalog
 
 
+def stream_dict_to_shared_mem(stream_dict):
+    """
+    Move the data of streams from a dict of (key, obspy.stream) into shared
+    memory so that the data can be retrieved by multiple processes in parallel.
+    This can help speed up parallel execution because the initiation of each
+    worker process becomes cheaper (less data to transfer). For now this only
+    puts the numpy array in trace.data into shared memory (because it's easy).
+
+    :type stream_dict: dict of (key, `obspy.stream`)
+    :param stream_dict: dict of streams that should be moved to shared memory
+
+    :returns: stream_dict, shm_name_list, shm_data_shapes, shm_data_dtypes
+
+    :rtype: dict
+    :return: Dictionary streams that were moved to shared memory
+    :rtype: list
+    :return: List of names to the shared memory address for each trace.
+    :rtype: list
+    :return:
+        List of numpy-array shaped for each trace-data array in shared memory.
+    :rtype: list
+    :return: List of data types for each trace-data-array in shared memory.
+
+    """
+    shm_name_list = []
+    shm_data_shapes = []
+    shm_data_dtypes = []
+    shm_references = []
+    for (key, stream) in stream_dict.items():
+        for tr in stream:
+            data_array = tr.data
+            # Let SharedMemory create suitable filename itself:
+            shm = shared_memory.SharedMemory(
+                create=True, size=data_array.nbytes)
+            shm_name_list.append(shm.name)
+            shm_references.append(shm)
+            # Now create a NumPy array backed by shared memory
+            shm_data_shape = data_array.shape
+            shm_data_dtype = data_array.dtype
+            shared_data_array = np.ndarray(
+                shm_data_shape, dtype=shm_data_dtype, buffer=shm.buf)
+            # Copy the original data into shared memory
+            shared_data_array[:] = data_array[:]
+            # tr.data = shared_data_array
+            tr.data = np.array([])
+            tr.shared_memory_name = shm.name
+            shm_data_shapes.append(shm_data_shape)
+            shm_data_dtypes.append(shm_data_dtype)
+    shm_data_shapes = list(set(shm_data_shapes))
+    shm_data_dtypes = list(set(shm_data_dtypes))
+    return (stream_dict, shm_name_list, shm_references, shm_data_shapes,
+            shm_data_dtypes)
+
+
 def compute_differential_times(catalog, correlation, stream_dict=None,
                                event_id_mapper=None, max_sep=8., min_link=8,
                                min_cc=None, extract_len=None, pre_pick=None,
                                shift_len=None, interpolate=False,
                                all_horiz=False, max_workers=None,
-                               max_trace_workers=1, *args, **kwargs):
+                               max_trace_workers=1, use_shared_memory=False,
+                               weight_by_square=True, *args, **kwargs):
     """
     Generate groups of differential times for a catalog.
 
@@ -517,6 +600,15 @@ def compute_differential_times(catalog, correlation, stream_dict=None,
         Maximum number of workers for parallel correlation of traces insted of
         events. If None then all threads will be used (but can only be used
         when max_workers = 1).
+    :type use_shared_memory: bool
+    :param use_shared_memory:
+        Whether to move trace data arrays into shared memory for computing
+        trace correlations. Can speed up total execution time by ~20 % for
+        hypodd-correlations with a lot of clustered seismicity.
+    :type weight_by_square: bool
+    :param weight_by_square:
+        Whether to compute correlation weights as the square of the maximum
+        correlation (True), or the maximum correlation (False).
 
     :rtype: dict
     :return: Dictionary of differential times keyed by event id.
@@ -534,12 +626,16 @@ def compute_differential_times(catalog, correlation, stream_dict=None,
         multiple events and may require more memory, but the latter can be
         quicker for few events with many or very long traces and requires less
         memory.
+
+    .. note::
+        Differential times are computed as travel-time for event 1 minus
+        travel-time for event 2 (tt1 - tt2).
     """
     include_master = kwargs.get("include_master", False)
     correlation_kwargs = dict(
         min_cc=min_cc, stream_dict=stream_dict, extract_len=extract_len,
         pre_pick=pre_pick, shift_len=shift_len, interpolate=interpolate,
-        max_workers=max_workers)
+        max_workers=max_workers, weight_by_square=weight_by_square)
     for key, value in kwargs.items():
         correlation_kwargs.update({key: value})
     if correlation:
@@ -587,6 +683,19 @@ def compute_differential_times(catalog, correlation, stream_dict=None,
             sub_catalogs = ([ev for i, ev in enumerate(sparse_catalog)
                              if master_filter[i]]
                             for master_filter in distance_filter)
+            # Move trace data into shared memory
+            if use_shared_memory:
+                (shm_stream_dict, shm_name_list, shm_references,
+                 shm_data_shapes, shm_dtypes) = (
+                    stream_dict_to_shared_mem(stream_dict))
+                if len(shm_data_shapes) == 1 and len(shm_dtypes) == 1:
+                    shm_data_shape = shm_data_shapes[0]
+                    shm_dtype = shm_dtypes[0]
+                    additional_args.update({'stream_dict': shm_stream_dict})
+                    additional_args.update({'shm_data_shape': shm_data_shape})
+                    additional_args.update({'shm_dtype': shm_dtype})
+                else:
+                    use_shared_memory = False
             with pool_boy(Pool, n, cores=max_workers) as pool:
                 # Parallelize over events instead of traces
                 additional_args.update(dict(max_workers=1))
@@ -598,11 +707,19 @@ def compute_differential_times(catalog, correlation, stream_dict=None,
                                                    sparse_catalog)
                     if str(master.resource_id) in additional_args[
                         "stream_dict"].keys()]
+                Logger.info('Submitted asynchronous jobs to workers.')
                 differential_times = {
                     master.resource_id: result.get()
                     for master, result in zip(sparse_catalog, results)
                     if str(master.resource_id) in additional_args[
                         "stream_dict"].keys()}
+                Logger.debug('Got results from workers.')
+                # Destroy shared memory
+                if use_shared_memory:
+                    for shm_name in shm_name_list:
+                        shm = shared_memory.SharedMemory(name=shm_name)
+                        shm.close()
+                        shm.unlink()
     else:
         sub_catalogs = ([ev for i, ev in enumerate(sparse_catalog)
                          if master_filter[i]]
@@ -659,6 +776,10 @@ def write_catalog(catalog, event_id_mapper=None, max_sep=8, min_link=8,
         threads will be used.
 
     :returns: event_id_mapper
+
+    .. note::
+        Differential times are computed as travel-time for event 1 minus
+        travel-time for event 2 (tt1 - tt2).
     """
     differential_times, event_id_mapper = compute_differential_times(
         catalog=catalog, correlation=False, event_id_mapper=event_id_mapper,
@@ -697,9 +818,10 @@ def _filter_stream(event_id, st, lowcut, highcut):
 
 def write_correlations(catalog, stream_dict, extract_len, pre_pick,
                        shift_len, event_id_mapper=None, lowcut=1.0,
-                       highcut=10.0, max_sep=8, min_link=8,  min_cc=0.0,
+                       highcut=10.0, max_sep=8, min_link=8, min_cc=0.0,
                        interpolate=False, all_horiz=False, max_workers=None,
-                       parallel_process=False, *args, **kwargs):
+                       parallel_process=False, weight_by_square=True,
+                       *args, **kwargs):
     """
     Write a dt.cc file for hypoDD input for a given list of events.
 
@@ -746,6 +868,10 @@ def write_correlations(catalog, stream_dict, extract_len, pre_pick,
     :param parallel_process:
         Whether to process streams in parallel or not. Experimental, may use
         too much memory.
+    :type weight_by_square: bool
+    :param weight_by_square:
+        Whether to compute correlation weights as the square of the maximum
+        correlation (True), or the maximum correlation (False).
 
     :rtype: dict
     :returns: event_id_mapper
@@ -754,6 +880,10 @@ def write_correlations(catalog, stream_dict, extract_len, pre_pick,
         You can provide processed waveforms, or let this function filter your
         data for you.  Filtering is undertaken by detrending and bandpassing
         with a 8th order zerophase butterworth filter.
+
+    .. note::
+        Differential times are computed as travel-time for event 1 minus
+        travel-time for event 2 (tt1 - tt2).
     """
     # Depreciated argument
     cc_thresh = kwargs.get("cc_thresh", None)
@@ -787,7 +917,8 @@ def write_correlations(catalog, stream_dict, extract_len, pre_pick,
         max_sep=max_sep, min_link=min_link, max_workers=max_workers,
         stream_dict=processed_stream_dict, min_cc=min_cc,
         extract_len=extract_len, pre_pick=pre_pick, shift_len=shift_len,
-        interpolate=interpolate, all_horiz=all_horiz, **kwargs)
+        interpolate=interpolate, all_horiz=all_horiz,
+        weight_by_square=weight_by_square, **kwargs)
     with open("dt.cc", "w") as f:
         for master_id, linked_events in correlation_times.items():
             for linked_event in linked_events:
@@ -802,7 +933,7 @@ def _hypodd_phase_pick_str(pick, sparse_event):
     """ Make a hypodd phase.dat style pick string. """
     pick_str = "{station:5s} {tt:7.4f} {weight:5.3f} {phase:1s}".format(
         station=pick.waveform_id.station_code,
-        tt=pick.tt, weight=pick.weight, phase_hint=pick.phase_hint[0].upper())
+        tt=pick.tt, weight=pick.weight, phase=pick.phase_hint[0].upper())
     return pick_str
 
 
diff --git a/eqcorrscan/utils/catalog_utils.py b/eqcorrscan/utils/catalog_utils.py
index 00cda0e6e..f8577dc1e 100644
--- a/eqcorrscan/utils/catalog_utils.py
+++ b/eqcorrscan/utils/catalog_utils.py
@@ -24,7 +24,8 @@
 
 
 def filter_picks(catalog, stations=None, channels=None, networks=None,
-                 locations=None, top_n_picks=None, evaluation_mode='all'):
+                 locations=None, top_n_picks=None, evaluation_mode='all',
+                 phase_hints=None, enforce_single_pick=False):
     """
     Filter events in the catalog based on a number of parameters.
 
@@ -43,6 +44,12 @@ def filter_picks(catalog, stations=None, channels=None, networks=None,
     :param evaluation_mode:
         To select only manual or automatic picks, or use all (default).
     :type evaluation_mode: str
+    :param phase_hints: List of retained phase hints, or None to use all
+    :type phase_hints: list
+    :param enforce_single_pick:
+        Method to enforce using only one pick of each phase-hint per
+        station or False to leave all. Can be {False, "earliest"}
+    :type enforce_single_pick: str
 
 
     :return:
@@ -88,6 +95,8 @@ def filter_picks(catalog, stations=None, channels=None, networks=None,
     >>> print(sorted(list(set(stations))))
     ['BAP', 'BMS', 'PAG', 'PAN', 'PBI', 'PKY', 'WOF', 'YEG']
     """
+    assert enforce_single_pick in {False, "earliest"}, \
+        f"enforce_single_pick={enforce_single_pick} unknown"
     # Don't work in place on the catalog
     filtered_catalog = catalog.copy()
 
@@ -115,6 +124,13 @@ def filter_picks(catalog, stations=None, channels=None, networks=None,
                 continue
             event.picks = [pick for pick in event.picks
                            if pick.waveform_id.location_code in locations]
+    if phase_hints:
+        for event in filtered_catalog:
+            if len(event.picks) == 0:
+                continue
+            event.picks = [pick for pick in event.picks
+                           if pick.phase_hint in phase_hints]
+
     if evaluation_mode == 'manual':
         for event in filtered_catalog:
             event.picks = [pick for pick in event.picks
@@ -159,6 +175,24 @@ def filter_picks(catalog, stations=None, channels=None, networks=None,
         if len(event.picks) > 0:
             tmp_catalog.append(event)
 
+    # Finally remove extra picks
+    if enforce_single_pick:
+        reverse = False
+        # TODO: Allow other options
+        for ev in tmp_catalog:
+            retained_picks = []
+            stations = {p.waveform_id.station_code for p in ev.picks}
+            for station in stations:
+                phase_hints = {p.phase_hint for p in ev.picks
+                               if p.waveform_id.station_code == station}
+                for phase_hint in phase_hints:
+                    picks = [p for p in ev.picks
+                             if p.waveform_id.station_code == station
+                             and p.phase_hint == phase_hint]
+                    picks.sort(key=lambda p: p.time, reverse=reverse)
+                    retained_picks.append(picks[0])
+            ev.picks = retained_picks
+
     return tmp_catalog
 
 
diff --git a/eqcorrscan/utils/clustering.py b/eqcorrscan/utils/clustering.py
index e74fb50d4..2988c6689 100644
--- a/eqcorrscan/utils/clustering.py
+++ b/eqcorrscan/utils/clustering.py
@@ -10,6 +10,7 @@
 """
 import os
 import logging
+import ctypes
 from multiprocessing import cpu_count
 
 import matplotlib.pyplot as plt
@@ -23,6 +24,8 @@
 from eqcorrscan.utils.correlate import (
     get_array_xcorr, get_stream_xcorr, CorrelationError)
 from eqcorrscan.utils.pre_processing import _prep_data_for_correlation
+from eqcorrscan.utils.libnames import _load_cdll
+
 
 Logger = logging.getLogger(__name__)
 
@@ -907,8 +910,6 @@ def remove_unclustered(catalog, distance_cutoff, num_threads=None):
     :returns: catalog
     :rtype: :class:`obspy.core.event.Catalog`
     """
-    import ctypes
-    from eqcorrscan.utils.libnames import _load_cdll
     from math import radians
 
     utilslib = _load_cdll('libutils')
@@ -974,9 +975,6 @@ def dist_mat_km(catalog, num_threads=None):
     :returns: distance matrix
     :rtype: :class:`numpy.ndarray`
     """
-    import ctypes
-    from eqcorrscan.utils.libnames import _load_cdll
-
     utilslib = _load_cdll('libutils')
 
     utilslib.distance_matrix.argtypes = [
diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py
index 81f497d2b..513c41be3 100644
--- a/eqcorrscan/utils/correlate.py
+++ b/eqcorrscan/utils/correlate.py
@@ -28,10 +28,15 @@
 
 import numpy as np
 import math
+
 from packaging import version
+from timeit import default_timer
+
+from obspy import UTCDateTime
 
 from eqcorrscan.utils.libnames import _load_cdll
 from eqcorrscan.utils import FMF_INSTALLED
+from eqcorrscan.utils.pre_processing import _stream_quick_select
 
 
 Logger = logging.getLogger(__name__)
@@ -353,8 +358,11 @@ def _zero_invalid_correlation_sums(cccsums, pad_dict, used_seed_ids):
     """
     # TODO: This is potentially quite a slow way to do this.
     for i, cccsum in enumerate(cccsums):
-        max_moveout = max(value[i] for key, value in pad_dict.items()
-                          if key in used_seed_ids[i])
+        moveouts = [value[i] for key, value in pad_dict.items()
+                    if key in used_seed_ids[i]]
+        max_moveout = 0
+        if len(moveouts):
+            max_moveout = max(moveouts)
         if max_moveout:
             cccsum[-max_moveout:] = 0.0
     return cccsums
@@ -741,23 +749,25 @@ def _fftw_stream_xcorr(templates, stream, stack=True, *args, **kwargs):
         list of list of tuples of station, channel for all cross-correlations.
     :rtype: list
     """
+    array_dict_tuple = _get_array_dicts(templates, stream, stack=stack)
+    stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple
+    assert set(seed_ids)
     # number of threads:
     #   default to using inner threads
     #   if `cores` or `cores_outer` passed in then use that
     #   else if OMP_NUM_THREADS set use that
     #   otherwise use all available
-    num_cores_inner = kwargs.pop('cores', None)
-    if num_cores_inner is None:
-        num_cores_inner = int(os.getenv("OMP_NUM_THREADS", cpu_count()))
+    inner_kwargs = copy.copy(kwargs)
+    num_cores_inner, num_cores_outer = _set_inner_outer_threading(
+        inner_kwargs.pop('cores', None),
+        inner_kwargs.pop("cores_outer", None),
+        len(stream))
 
     chans = [[] for _i in range(len(templates))]
-    array_dict_tuple = _get_array_dicts(templates, stream, stack=stack)
-    stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple
-    assert set(seed_ids)
     cccsums, tr_chans = fftw_multi_normxcorr(
         template_array=template_dict, stream_array=stream_dict,
         pad_array=pad_dict, seed_ids=seed_ids, cores_inner=num_cores_inner,
-        stack=stack, *args, **kwargs)
+        cores_outer=num_cores_outer, stack=stack, *args, **inner_kwargs)
     no_chans = np.sum(np.array(tr_chans).astype(int), axis=0)
     for seed_id, tr_chan in zip(seed_ids, tr_chans):
         for chan, state in zip(chans, tr_chan):
@@ -770,8 +780,39 @@ def _fftw_stream_xcorr(templates, stream, stack=True, *args, **kwargs):
     return cccsums, no_chans, chans
 
 
-def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids,
-                         cores_inner, stack=True, *args, **kwargs):
+def _set_inner_outer_threading(num_cores_inner, num_cores_outer, n_chans):
+    max_threads = int(os.getenv("OMP_NUM_THREADS", cpu_count()))
+    if num_cores_inner is None:
+        num_cores_inner = max_threads
+    num_cores_outer = num_cores_outer or 1
+    if num_cores_outer > 1:
+        if num_cores_outer > n_chans:
+            Logger.info(
+                "More outer cores than channels, setting to {0}".format(
+                    n_chans))
+            num_cores_outer = n_chans
+        if num_cores_outer * num_cores_inner > max_threads:
+            Logger.info("More threads requested than exist, falling back to "
+                        "outer-loop parallelism")
+            num_cores_outer = min(max_threads, num_cores_outer)
+            if 2 * num_cores_outer < max_threads:
+                num_cores_inner = max_threads // num_cores_outer
+            else:
+                num_cores_inner = 1
+    return num_cores_inner, num_cores_outer
+
+
+def fftw_multi_normxcorr(
+    template_array,
+    stream_array,
+    pad_array,
+    seed_ids,
+    cores_inner,
+    cores_outer,
+    stack=True,
+    *args,
+    **kwargs
+):
     """
     Use a C loop rather than a Python loop - in some cases this will be fast.
 
@@ -803,7 +844,7 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids,
                                flags='C_CONTIGUOUS'),
         np.ctypeslib.ndpointer(dtype=np.intc,
                                flags='C_CONTIGUOUS'),
-        ctypes.c_int,
+        ctypes.c_int, ctypes.c_int,
         np.ctypeslib.ndpointer(dtype=np.intc,
                                flags='C_CONTIGUOUS'),
         np.ctypeslib.ndpointer(dtype=np.intc,
@@ -822,7 +863,8 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids,
         fft-length
         used channels (stacked as per templates)
         pad array (stacked as per templates)
-        num thread inner
+        num threads inner
+        num threads outer
         variance warnings
         missed correlation warnings (usually due to gaps)
         stack option
@@ -842,10 +884,12 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids,
     n_channels = len(seed_ids)
     n_templates = template_array[seed_ids[0]].shape[0]
     image_len = stream_array[seed_ids[0]].shape[0]
-    # In testing, 2**13 consistently comes out fastest - setting to
-    # default. https://github.com/eqcorrscan/EQcorrscan/pull/285
-    fft_len = kwargs.get(
-        "fft_len", min(2 ** 13, next_fast_len(template_len + image_len - 1)))
+    fft_len = kwargs.get("fft_len")
+    if fft_len is None:
+        # In testing, 2**13 consistently comes out fastest - setting to
+        # default. https://github.com/eqcorrscan/EQcorrscan/pull/285
+        # But this results in lots of chunks - 2 ** 17 is also good.
+        fft_len = min(2 ** 17, next_fast_len(template_len + image_len - 1))
     if fft_len < template_len:
         Logger.warning(
             f"FFT length of {fft_len} is shorter than the template, setting to"
@@ -885,7 +929,8 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids,
     ret = utilslib.multi_normxcorr_fftw(
         template_array, n_templates, template_len, n_channels, stream_array,
         image_len, cccs, fft_len, used_chans_np, pad_array_np,
-        cores_inner, variance_warnings, missed_correlations, int(stack))
+        cores_inner, cores_outer, variance_warnings, missed_correlations,
+        int(stack))
     if ret < 0:
         raise MemoryError("Memory allocation failed in correlation C-code")
     elif ret > 0:
@@ -914,6 +959,16 @@ def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids,
 # ------------------------------- FastMatchedFilter Wrapper
 
 def _run_fmf_xcorr(template_arr, data_arr, weights, pads, arch, step=1):
+    template_arr, data_arr, multipliers = _fmf_stabilisation(
+        template_arr=template_arr, data_arr=data_arr)
+
+    return _stabalised_fmf(
+        template_arr=template_arr, data_arr=data_arr, weights=weights,
+        pads=pads, arch=arch, multipliers=multipliers, step=step)
+
+
+def _stabalised_fmf(template_arr, data_arr, weights, pads, arch, multipliers,
+                    step):
     if not FMF_INSTALLED:
         raise ImportError("FastMatchedFilter is not available")
     import fast_matched_filter
@@ -923,32 +978,47 @@ def _run_fmf_xcorr(template_arr, data_arr, weights, pads, arch, step=1):
     else:
         raise ImportError(f"FMF version {fast_matched_filter.__version__} "
                           f"must be >= {MIN_FMF_VERSION}")
-    # Demean
-    template_arr -= template_arr.mean(axis=-1, keepdims=True)
-    data_arr -= data_arr.mean(axis=-1, keepdims=True)
-
-    multipliers = []
-    for x in range(data_arr.shape[0]):
-        # Check that stream is non-zero and above variance threshold
-        if not np.all(data_arr[x] == 0) and np.var(data_arr[x]) < 1e-8:
-            # Apply gain
-            data_arr[x] *= MULTIPLIER
-            Logger.warning(f"Low variance found for {x}, applying gain "
-                           "to stabilise correlations")
-            multipliers.append(MULTIPLIER)
-        else:
-            multipliers.append(1)
 
+    Logger.info("Handing off to FMF")
     cccsums = fmf(
         templates=template_arr, weights=weights, moveouts=pads,
         data=data_arr, step=step, arch=arch, normalize="full")
+    Logger.info("FMF returned")
     # Remove gain
-    for x in range(data_arr.shape[0]):
-        data_arr[x] *= multipliers[x]
-
+    if np.any(multipliers != 1):
+        data_arr /= multipliers
     return cccsums
 
 
+def _fmf_stabilisation(template_arr, data_arr):
+    """ FMF doesn't do the internal check that EQC correlations do. """
+    # Demean
+    tic = default_timer()
+    template_arr -= template_arr.mean(axis=-1, keepdims=True)
+    data_arr -= data_arr.mean(axis=-1, keepdims=True)
+    toc = default_timer()
+    Logger.info(f"Removing mean took {toc - tic:.4f} s")
+
+    # Stability checking
+    tic = default_timer()
+    # var is fairly slow, var = mean(abs(a - a.mean()) ** 2) - mean is zero,
+    # so we can skip a step
+    stability_issues = np.logical_and(
+        # data_arr.var(axis=1, keepdims=True) < 1e-8,
+        np.mean(np.abs(data_arr) ** 2, axis=1, keepdims=True) < 1e-8,
+        ~np.all(data_arr == 0, axis=1, keepdims=True))
+    multipliers = np.ones_like(stability_issues, dtype=float)
+    multipliers[stability_issues] = MULTIPLIER
+    if np.any(stability_issues):
+        Logger.warning(
+            f"Low variance found for channels {np.where(stability_issues)},"
+            f"applying gain to stabilise correlations")
+        data_arr *= multipliers
+    toc = default_timer()
+    Logger.info(f"Checking stability took {toc - tic:.4f} s")
+    return template_arr, data_arr, multipliers
+
+
 @register_array_xcorr("fmf")
 def fmf_xcorr(templates, stream, pads, arch="precise", *args, **kwargs):
     """
@@ -1009,6 +1079,22 @@ def _fmf_cpu(templates, stream, *args, **kwargs):
     return _fmf_multi_xcorr(templates, stream, arch="precise")
 
 
+def _fmf_reshape(template_dict, stream_dict, pad_dict, seed_ids):
+    tic = default_timer()
+    # Reshape templates into [templates x traces x time]
+    t_arr = np.array([template_dict[seed_id]
+                      for seed_id in seed_ids]).swapaxes(0, 1)
+    # Reshape stream into [traces x time]
+    d_arr = np.array([stream_dict[seed_id] for seed_id in seed_ids])
+    # Moveouts should be [templates x traces]
+    pads = np.array([pad_dict[seed_id] for seed_id in seed_ids]).swapaxes(0, 1)
+    # Weights should be shaped like pads
+    weights = np.ones_like(pads)
+    toc = default_timer()
+    Logger.info(f"Reshaping for FMF took {toc - tic:.4f} s")
+    return t_arr, d_arr, weights, pads
+
+
 def _fmf_multi_xcorr(templates, stream, *args, **kwargs):
     """
     Apply FastMatchedFilter routine concurrently.
@@ -1045,15 +1131,9 @@ def _fmf_multi_xcorr(templates, stream, *args, **kwargs):
     stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple
     assert set(seed_ids)
 
-    # Reshape templates into [templates x traces x time]
-    t_arr = np.array([template_dict[seed_id]
-                      for seed_id in seed_ids]).swapaxes(0, 1)
-    # Reshape stream into [traces x time]
-    d_arr = np.array([stream_dict[seed_id] for seed_id in seed_ids])
-    # Moveouts should be [templates x traces]
-    pads = np.array([pad_dict[seed_id] for seed_id in seed_ids]).swapaxes(0, 1)
-    # Weights should be shaped like pads
-    weights = np.ones_like(pads)
+    t_arr, d_arr, weights, pads = _fmf_reshape(
+        template_dict=template_dict, stream_dict=stream_dict,
+        pad_dict=pad_dict, seed_ids=seed_ids)
 
     cccsums = _run_fmf_xcorr(
         template_arr=t_arr, weights=weights, pads=pads,
@@ -1106,8 +1186,9 @@ def get_stream_xcorr(name_or_func=None, concurrency=None):
 # --------------------------- stream prep functions
 
 
-def _get_array_dicts(templates, stream, stack, copy_streams=True):
+def _get_array_dicts(templates, stream, stack, *args, **kwargs):
     """ prepare templates and stream, return dicts """
+    tic = default_timer()
     # Do some reshaping
     # init empty structures for data storage
     template_dict = {}
@@ -1118,16 +1199,18 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
     stream.sort(['network', 'station', 'location', 'channel'])
     for template in templates:
         template.sort(['network', 'station', 'location', 'channel'])
-        t_starts.append(min([tr.stats.starttime for tr in template]))
+        t_starts.append(
+            UTCDateTime(ns=min([tr.stats.starttime.__dict__['_UTCDateTime__ns']
+                                for tr in template])))
     stream_start = min([tr.stats.starttime for tr in stream])
     # get seed ids, make sure these are collected on sorted streams
     seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])]
     # pull common channels out of streams and templates and put in dicts
     for i, seed_id in enumerate(seed_ids):
-        temps_with_seed = [template[i].data for template in templates]
+        temps_with_seed = [template.traces[i].data for template in templates]
         t_ar = np.array(temps_with_seed).astype(np.float32)
         template_dict.update({seed_id: t_ar})
-        stream_channel = stream.select(id=seed_id.split('_')[0])[0]
+        stream_channel = _stream_quick_select(stream, seed_id.split('_')[0])[0]
         # Normalize data to ensure no float overflow
         stream_data = stream_channel.data / (np.max(
             np.abs(stream_channel.data)) / 1e5)
@@ -1138,16 +1221,22 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
         # pad_list can become 0. 0-1 = -1; which is problematic.
         stream_offset = int(
             math.floor(stream_channel.stats.sampling_rate *
-                  (stream_channel.stats.starttime - stream_start)))
+                       (stream_channel.stats.starttime - stream_start)))
         if stack:
             pad_list = [
-                int(round(template[i].stats.sampling_rate *
-                          (template[i].stats.starttime -
-                           t_starts[j]))) - stream_offset
-                for j, template in zip(range(len(templates)), templates)]
+                int(round(
+                    template.traces[i].stats.__dict__['sampling_rate'] *
+                    (template.traces[i].stats.starttime.__dict__[
+                        '_UTCDateTime__ns'] -
+                     t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) -
+                stream_offset
+                for j, template in enumerate(templates)]
         else:
-            pad_list = [0 for _ in range(len(templates))]
+            pad_list = [0 for _ in templates]
         pad_dict.update({seed_id: pad_list})
+    toc = default_timer()
+    Logger.info(f"Making array dicts for {len(seed_ids)} seed ids "
+                f"took {toc - tic:.4f} s")
 
     return stream_dict, template_dict, pad_dict, seed_ids
 
diff --git a/eqcorrscan/utils/findpeaks.py b/eqcorrscan/utils/findpeaks.py
index 58174fd51..84f5f60e5 100644
--- a/eqcorrscan/utils/findpeaks.py
+++ b/eqcorrscan/utils/findpeaks.py
@@ -13,6 +13,7 @@
 import numpy as np
 
 from multiprocessing import Pool, cpu_count
+from concurrent.futures import ThreadPoolExecutor
 from scipy import ndimage
 
 from eqcorrscan.utils.correlate import pool_boy
@@ -89,14 +90,13 @@ def find_peaks_compiled(arr, thresh, trig_int, full_peaks=False):
     else:
         peak_vals = arr
         peak_indices = np.arange(arr.shape[0])
+    peaks = []
     if len(peak_vals) > 0:
         peaks = decluster(
             peaks=np.array(peak_vals), index=np.array(peak_indices),
             trig_int=trig_int + 1, threshold=thresh)
         peaks = sorted(peaks, key=lambda peak: peak[1], reverse=False)
-        return peaks
-    else:
-        return []
+    return peaks
 
 
 def find_peaks2_short(arr, thresh, trig_int, full_peaks=False):
@@ -142,6 +142,7 @@ def find_peaks2_short(arr, thresh, trig_int, full_peaks=False):
         Logger.debug("No values over threshold {0}".format(thresh))
         return []
     if np.all(np.abs(arr) > thresh):
+        Logger.debug("All values above threshold, running full peak finding")
         full_peaks = True
     Logger.debug('Found {0} samples above the threshold'.format(
         len(image[image > thresh])))
@@ -233,170 +234,18 @@ def multi_find_peaks(arr, thresh, trig_int, parallel=True, full_peaks=False,
                     pool.apply_async(internal_func, param) for param in params]
                 peaks = [res.get() for res in results]
         else:
-            peaks = _multi_find_peaks_compiled(
-                arr, thresh, trig_int, full_peaks=full_peaks, cores=cores)
+            to_run = ((arr[i], thresh[i], trig_int)
+                      for i in range(len(thresh)))
+            with ThreadPoolExecutor(cores) as executor:
+                results = executor.map(
+                    lambda args: find_peaks_compiled(*args), to_run)
+            peaks = [r for r in results]
     return peaks
 
 
-def _multi_find_peaks_compiled(arrays, thresholds, trig_int, full_peaks,
-                               cores):
-    """
-    Determine peaks in an array or arrays of data above a certain threshold.
-
-    :type arrays: numpy.ndarray
-    :param arrays: 2-D numpy array is required
-    :type thresholds: list
-    :param thresholds:
-        Minimum value for peaks.
-    :type trig_int: int
-    :param trig_int:
-        The minimum difference in samples between triggers, if multiple
-        peaks within this window this code will find the highest.
-    :type full_peaks: bool
-    :param full_peaks:
-        If True, will decluster within data-sections above the threshold,
-        rather than just taking the peak within that section. This will take
-        more time. This defaults to False for match_filter.
-    :type cores: int
-    :param cores: Number of threads to parallel across
-
-    :return: peaks: List of List of tuples of peak values and locations.
-    :rtype: list
-    """
-    if not full_peaks:
-        peak_vals, peak_indices = _multi_find_peaks_c(
-            arrays=arrays, thresholds=thresholds, threads=cores)
-        # Remove empty arrays
-        peak_mapper = {}
-        map_index = 0
-        _peak_vals = []
-        _peak_indices = []
-        _thresholds = []
-        for i in range(arrays.shape[0]):
-            if len(peak_vals[i]) > 0:
-                peak_mapper.update({i: map_index})
-                _peak_vals.append(peak_vals[i])
-                _peak_indices.append(peak_indices[i])
-                _thresholds.append(thresholds[i])
-                map_index += 1
-        peak_vals = _peak_vals
-        peak_indices = _peak_indices
-        thresholds = _thresholds
-    else:
-        peak_vals = arrays
-        peak_indices = [np.arange(arr.shape[0]) for arr in arrays]
-        peak_mapper = {i: i for i in range(len(peak_indices))}
-    if len(peak_indices) > 0:
-        peaks = _multi_decluster(
-            peaks=peak_vals, indices=peak_indices, trig_int=trig_int,
-            thresholds=thresholds, cores=cores)
-        peaks = [sorted(_peaks, key=lambda peak: peak[1], reverse=False)
-                 for _peaks in peaks]
-    out_peaks = []
-    for i in range(arrays.shape[0]):
-        if i in peak_mapper.keys():
-            out_peaks.append(peaks[peak_mapper[i]])
-        else:
-            out_peaks.append([])
-    return out_peaks
-
-
-def _multi_decluster(peaks, indices, trig_int, thresholds, cores):
-    """
-    Decluster peaks based on an enforced minimum separation.
-
-    Only works when peaks and indices are all the same shape.
-
-    :type peaks: list
-    :param peaks: list of arrays of peak values
-    :type indices: list
-    :param indices: list of arrays of locations of peaks
-    :type trig_int: int
-    :param trig_int: Minimum trigger interval in samples
-    :type thresholds: list
-    :param thresholds: list of float of threshold values
-
-    :return: list of lists of tuples of (value, sample)
-    """
-    utilslib = _load_cdll('libutils')
-
-    lengths = np.array([peak.shape[0] for peak in peaks], dtype=int)
-    trig_int = int(trig_int)
-    n = np.int32(len(peaks))
-    cores = min(cores, n)
-
-    total_length = lengths.sum()
-
-    max_indexes = [_indices.max() for _indices in indices]
-    max_index = max(max_indexes)
-    for var in [trig_int, lengths.max(), max_index]:
-        if var == ctypes.c_long(var).value:
-            long_type = ctypes.c_long
-            func = utilslib.multi_decluster
-        elif var == ctypes.c_longlong(var).value:
-            long_type = ctypes.c_longlong
-            func = utilslib.multi_decluster_ll
-        else:
-            # Note, could use numpy.gcd to try and find greatest common
-            # divisor and make numbers smaller
-            raise OverflowError("Maximum index larger than internal long long")
-
-    func.argtypes = [
-        np.ctypeslib.ndpointer(dtype=np.float32, shape=(total_length,),
-                               flags='C_CONTIGUOUS'),
-        np.ctypeslib.ndpointer(dtype=long_type, shape=(total_length,),
-                               flags='C_CONTIGUOUS'),
-        np.ctypeslib.ndpointer(dtype=long_type, shape=(n,),
-                               flags='C_CONTIGUOUS'),
-        ctypes.c_int,
-        np.ctypeslib.ndpointer(dtype=np.float32, shape=(n,),
-                               flags='C_CONTIGUOUS'),
-        long_type,
-        np.ctypeslib.ndpointer(dtype=np.uint32, shape=(total_length,),
-                               flags='C_CONTIGUOUS'),
-        ctypes.c_int]
-    func.restype = ctypes.c_int
-
-    peaks_sorted = np.empty(total_length, dtype=np.float32)
-    indices_sorted = np.empty_like(peaks_sorted, dtype=np.float32)
-
-    # TODO: When doing full decluster from match-filter, all lengths will be
-    # TODO: the same - would be more efficient to use numpy sort on 2D matrix
-    start_ind = 0
-    end_ind = 0
-    for _peaks, _indices, length in zip(peaks, indices, lengths):
-        end_ind += length
-        sorted_indices = np.abs(_peaks).argsort()
-        peaks_sorted[start_ind: end_ind] = _peaks[sorted_indices[::-1]]
-        indices_sorted[start_ind: end_ind] = _indices[sorted_indices[::-1]]
-        start_ind += length
-
-    peaks_sorted = np.ascontiguousarray(peaks_sorted, dtype=np.float32)
-    indices_sorted = np.ascontiguousarray(
-        indices_sorted, dtype=long_type)
-    lengths = np.ascontiguousarray(lengths, dtype=long_type)
-    thresholds = np.ascontiguousarray(thresholds, dtype=np.float32)
-    out = np.zeros(total_length, dtype=np.uint32)
-    ret = func(
-        peaks_sorted, indices_sorted, lengths, np.int32(n), thresholds,
-        long_type(trig_int + 1), out, np.int32(cores))
-    if ret != 0:
-        raise MemoryError("Issue with c-routine, returned %i" % ret)
-
-    peaks_out = []
-    slice_start = 0
-    for length in lengths:
-        slice_end = slice_start + length
-        out_mask = out[slice_start: slice_end].astype(bool)
-        declustered_peaks = peaks_sorted[slice_start: slice_end][out_mask]
-        declustered_indices = indices_sorted[slice_start: slice_end][out_mask]
-        peaks_out.append(list(zip(declustered_peaks, declustered_indices)))
-        slice_start = slice_end
-    return peaks_out
-
-
 def decluster_distance_time(peaks, index, trig_int, catalog,
-                            hypocentral_separation, threshold=0):
+                            hypocentral_separation, threshold=0,
+                            num_threads=None):
     """
     Decluster based on time between peaks, and distance between events.
 
@@ -419,11 +268,13 @@ def decluster_distance_time(peaks, index, trig_int, catalog,
         Maximum inter-event distance to decluster over in km
     :type threshold: float
     :param threshold: Minimum absolute peak value to retain it
+    :type num_threads: int
+    :param num_threads:
+        Number of threads to use for distance matrix calculation.
 
     :return: list of tuples of (value, sample)
     """
-    utilslib = _load_cdll('libutils')
-
+    utilslib = _load_cdll("libutils")
     length = peaks.shape[0]
     trig_int = int(trig_int)
 
@@ -454,7 +305,8 @@ def decluster_distance_time(peaks, index, trig_int, catalog,
     arr = peaks[sorted_inds[::-1]]
     inds = index[sorted_inds[::-1]]
     sorted_events = [catalog[i] for i in sorted_inds[::-1]]
-    distance_matrix = dist_mat_km(catalog=sorted_events)
+    distance_matrix = dist_mat_km(
+        catalog=sorted_events, num_threads=num_threads)
 
     arr = np.ascontiguousarray(arr, dtype=np.float32)
     inds = np.ascontiguousarray(inds, dtype=long_type)
@@ -487,8 +339,7 @@ def decluster(peaks, index, trig_int, threshold=0):
 
     :return: list of tuples of (value, sample)
     """
-    utilslib = _load_cdll('libutils')
-
+    utilslib = _load_cdll("libutils")
     length = peaks.shape[0]
     trig_int = int(trig_int)
 
@@ -524,7 +375,6 @@ def decluster(peaks, index, trig_int, threshold=0):
         long_type(trig_int), out)
     if ret != 0:
         raise MemoryError("Issue with c-routine, returned %i" % ret)
-
     peaks_out = list(zip(arr[out.astype(bool)], inds[out.astype(bool)]))
     return peaks_out
 
@@ -533,8 +383,7 @@ def _find_peaks_c(array, threshold):
     """
     Use a C func to find peaks in the array.
     """
-    utilslib = _load_cdll('libutils')
-
+    utilslib = _load_cdll("libutils")
     length = array.shape[0]
     utilslib.find_peaks.argtypes = [
         np.ctypeslib.ndpointer(dtype=np.float32, shape=(length, ),
@@ -554,44 +403,6 @@ def _find_peaks_c(array, threshold):
     return array[peaks_locations], peaks_locations[0]
 
 
-def _multi_find_peaks_c(arrays, thresholds, threads):
-    """
-    Wrapper for multi-find peaks C-func
-    """
-    utilslib = _load_cdll('libutils')
-
-    length = arrays.shape[1]
-    n = np.int32(arrays.shape[0])
-    thresholds = np.ascontiguousarray(thresholds, np.float32)
-    arr = np.ascontiguousarray(arrays.flatten(), np.float32)
-    utilslib.multi_find_peaks.argtypes = [
-        np.ctypeslib.ndpointer(dtype=np.float32, shape=(n * length,),
-                               flags='C_CONTIGUOUS'),
-        ctypes.c_long, ctypes.c_int,
-        np.ctypeslib.ndpointer(dtype=np.float32, shape=(n, ),
-                               flags='C_CONTIGUOUS'),
-        ctypes.c_int,
-        np.ctypeslib.ndpointer(dtype=np.uint32, shape=(n * length, ),
-                               flags='C_CONTIGUOUS')]
-    utilslib.multi_find_peaks.restype = ctypes.c_int
-
-    out = np.ascontiguousarray(np.zeros((n * length, ), dtype=np.uint32))
-    ret = utilslib.multi_find_peaks(
-        arr, ctypes.c_long(length), n, thresholds, threads, out)
-    # Copy data to avoid farking the users data
-    if ret != 0:
-        raise MemoryError("Internal error")
-    peaks = []
-    peak_locations = []
-    out = out.reshape(n, length)
-    for i in range(n):
-        peak_locs = np.nonzero(out[i])
-        peaks.append(arrays[i][peak_locs])
-        peak_locations.append(peak_locs[0])
-
-    return peaks, peak_locations
-
-
 def coin_trig(peaks, stachans, samp_rate, moveout, min_trig, trig_int):
     """
     Find network coincidence triggers within peaks of detection statistics.
diff --git a/eqcorrscan/utils/libnames.py b/eqcorrscan/utils/libnames.py
index adc9576d9..db6a739e1 100644
--- a/eqcorrscan/utils/libnames.py
+++ b/eqcorrscan/utils/libnames.py
@@ -49,7 +49,7 @@ def _load_cdll(name):
             errs.append(msg)
             Logger.debug(msg)
         else:
-            Logger.info(f"Loaded library from {libpath}")
+            Logger.debug(f"Loaded library from {libpath}")
             return cdll
     raise ImportError(
         "Could not load shared library {0} due to "
diff --git a/eqcorrscan/utils/plotting.py b/eqcorrscan/utils/plotting.py
index 4681cc2db..ce58726ef 100644
--- a/eqcorrscan/utils/plotting.py
+++ b/eqcorrscan/utils/plotting.py
@@ -321,10 +321,10 @@ def peaks_plot(data, starttime, samp_rate, peaks=None, **kwargs):
     fig = plt.figure()
     ax1 = fig.add_subplot(111)
     ax1.plot(t, data, 'k')
-    ax1.scatter(peaks[0][1] / (samp_rate * 3600), abs(peaks[0][0]),
+    ax1.scatter(peaks[0][1] / (samp_rate * 3600), peaks[0][0],
                 color='r', label='Peaks')
     for peak in peaks:
-        ax1.scatter(peak[1] / (samp_rate * 3600), abs(peak[0]), color='r')
+        ax1.scatter(peak[1] / (samp_rate * 3600), peak[0], color='r')
     ax1.legend()
     ax1.set_xlabel("Time after %s [hr]" % starttime.isoformat())
     ax1.axis('tight')
@@ -1274,7 +1274,7 @@ def noise_plot(signal, noise, normalise=False, **kwargs):
     axes[-2].set_xlabel("Frequency (Hz)")
     axes[0].set_title("Spectra")
     axes[1].set_title("Signal - noise")
-    fig.legend(lines, labels, 'upper left')
+    fig.legend(lines, labels, loc='upper left')
     fig.subplots_adjust(hspace=0, top=0.91)
     fig = _finalise_figure(fig=fig, **kwargs)  # pragma: no cover
     return fig
@@ -1954,7 +1954,6 @@ def spec_trace(traces, cmap=None, wlen=0.4, log=False, trc='k', tralpha=0.9,
             ax = fig.add_subplot(len(traces), 1, i + 1, sharex=ax)
         ax1, ax2 = _spec_trace(tr, cmap=cmap, wlen=wlen, log=log, trc=trc,
                                tralpha=tralpha, axes=ax)
-        ax.set_yticks([])
         if i < len(traces) - 1:
             plt.setp(ax1.get_xticklabels(), visible=False)
         if isinstance(traces, list):
diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py
index e0f515ff3..5c4489ea3 100644
--- a/eqcorrscan/utils/pre_processing.py
+++ b/eqcorrscan/utils/pre_processing.py
@@ -9,30 +9,35 @@
     GNU Lesser General Public License, Version 3
     (https://www.gnu.org/copyleft/lesser.html)
 """
+import os
+
 import numpy as np
 import logging
-import datetime as dt
+import copy
 
-from collections import Counter
-from multiprocessing import Pool, cpu_count
+from collections import Counter, defaultdict
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache, partial
+from scipy.signal import iirfilter, sosfilt, zpk2sos
 
 from obspy import Stream, Trace, UTCDateTime
 from obspy.core.trace import Stats
-from obspy.signal.filter import bandpass, lowpass, highpass
 
 
 Logger = logging.getLogger(__name__)
 
 
-def _check_daylong(tr):
+def _check_daylong(data, threshold=0.5):
     """
-    Check the data quality of the daylong file.
+    Check data continuity.
 
-    Check to see that the day isn't just zeros, with large steps, if it is
+    Check to see that the day is more than threshold of zeros, if it is
     then the resampling will hate it.
 
-    :type tr: obspy.core.trace.Trace
-    :param tr: Trace to check if the data are daylong.
+    :type data: np.array
+    :param data: Data from Trace to check if the data are okay.
+    :type threshold: float
+    :param threshold: Fraction of data to accept as zeros.
 
     :return quality (simply good or bad)
     :rtype: bool
@@ -47,133 +52,173 @@ def _check_daylong(tr):
     >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data'
     >>> st = read(TEST_PATH + '/WAV/TEST_/' +
     ...           '2013-09-01-0410-35.DFDPC_024_00')
-    >>> _check_daylong(st[0])
+    >>> _check_daylong(st[0].data)
     True
+    >>> zeroed_data = st[0].copy().data
+    >>> zeroed_data[0:9100] = np.zeros(9100)
+    >>> _check_daylong(zeroed_data)
+    False
     """
-    if len(np.nonzero(tr.data)[0]) < 0.5 * len(tr.data):
-        qual = False
-    else:
-        qual = True
+    return np.nonzero(data)[0].shape[0] >= threshold * data.shape[0]
+
+
+def _simple_qc(st, max_workers=None, chunksize=1):
+    """
+    Multithreaded simple QC of data.
+
+    :param st: Stream of data to check
+    :type st: obspy.core.Stream
+    :param max_workers: Maximum number of threads to use
+    :type max_workers: int
+    :param chunksize: Number of traces to process per thread
+    :type chunksize: int
+
+    :return: dict of {tr.id: quality} where quality is bool
+    """
+    qual = dict()
+    with ThreadPoolExecutor(max_workers) as executor:
+        for tr, _qual in zip(st, executor.map(
+                _check_daylong, (tr.data for tr in st), chunksize=chunksize)):
+            qual[tr.id] = _qual
     return qual
 
 
-def shortproc(st, lowcut, highcut, filt_order, samp_rate, parallel=False,
-              num_cores=False, starttime=None, endtime=None,
-              seisan_chan_names=False, fill_gaps=True, ignore_length=False,
-              ignore_bad_data=False, fft_threads=1):
+def _sanitize_length(st, starttime=None, endtime=None, daylong=False):
+    """
+    Check length and work out start, end, length and trimming criteria
+
+    :param st: Stream to check
+    :type st: obspy.core.Stream
+    :param starttime: Desired starttime - if None, will be evaluated from data
+    :type starttime: obspy.core.UTCDateTime
+    :param endtime: DEsired endtime - can be None
+    :type endtime: obspy.core.UTCDateTime
+    :param daylong: Whether data should be one-day long.
+    :type daylong: bool
+
+    :return: obspy.core.Stream, length[float], clip[bool],
+        starttime[obspy.core.UTCDateTime]
     """
-    Basic function to bandpass and downsample.
+    length, clip = None, False
+
+    if daylong:
+        length, clip = 86400, True
+        # Set the start-time to a day start - cope with
+        if starttime is None:
+            startdates = []
+            for tr in st:
+                if abs(tr.stats.starttime - (UTCDateTime(
+                        tr.stats.starttime.date) + 86400)) < tr.stats.delta:
+                    # If the trace starts within 1 sample of the next day,
+                    # use the next day as the startdate
+                    startdates.append((tr.stats.starttime + 86400).date)
+                    Logger.warning(
+                        f'{tr.id} starts within 1 sample of the next day, '
+                        f'using this time {(tr.stats.starttime + 86400).date}')
+                else:
+                    startdates.append(tr.stats.starttime.date)
+            # Check that all traces start on the same date...
+            if not len(set(startdates)) == 1:
+                raise NotImplementedError('Traces start on different days')
+            starttime = UTCDateTime(startdates[0])
+    else:
+        if starttime is not None and endtime is not None:
+            for tr in st:
+                Logger.info(
+                    f"Trimming {tr.id} between {starttime} and {endtime}")
+                tr.trim(starttime, endtime)
+                if len(tr.data) == ((endtime - starttime) *
+                                    tr.stats.sampling_rate) + 1:
+                    Logger.info(f"{tr.id} is overlength dropping first sample")
+                    tr.data = tr.data[1:len(tr.data)]
+                    # TODO: this should adjust the start-time
+                    # tr.stats.starttime += tr.stats.delta
+            length = endtime - starttime
+            clip = True
+        elif starttime:
+            for tr in st:
+                tr.trim(starttime=starttime)
+        elif endtime:
+            for tr in st:
+                tr.trim(endtime=endtime)
+    return st, length, clip, starttime
+
+
+@lru_cache(maxsize=5)
+def _get_window(window, npts):
+    """ Get window for resampling stabilisation. """
+    from scipy.signal import get_window
+    return np.fft.ifftshift(get_window(window, npts))
 
-    Works in place on data.  This is employed to ensure all parts of the
-    data are processed in the same way.
 
-    :type st: obspy.core.stream.Stream
+def multi_process(st, lowcut, highcut, filt_order, samp_rate, parallel=False,
+                  num_cores=False, starttime=None, endtime=None,
+                  daylong=False, seisan_chan_names=False, fill_gaps=True,
+                  ignore_length=False, ignore_bad_data=False):
+    """
+    Apply standardised processing workflow to data for matched-filtering
+
+    Steps:
+
+        #. Check length and continuity of data meets user-defined criteria
+        #. Fill remaining gaps in data with zeros and record gap positions
+        #. Detrend data (using a simple linear detrend to set start and
+           end to 0)
+        #. Pad data to length
+        #. Resample in the frequency domain
+        #. Detrend data (using a simple linear detrend to set start and
+           end to 0)
+        #. Zerophase Butterworth filter
+        #. Re-check length
+        #. Re-apply zero-padding to gap locations recording in step 2 to remove
+           filtering and resampling artefacts
+
     :param st: Stream to process
+    :type st: obspy.core.Stream
+    :param lowcut:
+        Lowcut of butterworth filter in Hz. If set to None and highcut is
+        given a highpass filter will be applied. If both lowcut and highcut
+        are given, a bandpass filter will be applied. If lowcut and highcut
+        are both None, no filtering will be applied.
     :type lowcut: float
-    :param lowcut: Low cut for bandpass in Hz
+    :param highcut:
+        Highcut of butterworth filter in Hz. If set to None and lowcut is
+        given a lowpass filter will be applied. If both lowcut and highcut
+        are given, a bandpass filter will be applied. If lowcut and highcut
+        are both None, no filtering will be applied.
     :type highcut: float
-    :param highcut: High cut for bandpass in Hz
+    :param filt_order: Filter order
     :type filt_order: int
-    :param filt_order: Number of corners for bandpass filter
+    :param samp_rate: Desired sample rate of output data in Hz
     :type samp_rate: float
-    :param samp_rate: Sampling rate desired in Hz
+    :param parallel: Whether to process data in parallel (uses multi-threading)
     :type parallel: bool
-    :param parallel:
-        Set to True to process traces in parallel, for small numbers of traces
-        this is often slower than serial processing, defaults to False
+    :param num_cores: Maximum number of cores to use for parallel processing
     :type num_cores: int
-    :param num_cores:
-        Control the number of cores for parallel processing, if set to False
-        then this will use all the cores available.
-    :type starttime: obspy.core.utcdatetime.UTCDateTime
-    :param starttime:
-        Desired data start time, will trim to this before processing
-    :type endtime: obspy.core.utcdatetime.UTCDateTime
-    :param endtime:
-        Desired data end time, will trim to this before processing
-    :type seisan_chan_names: bool
+    :param starttime: Desired starttime of data
+    :type starttime: obspy.core.UTCDateTime
+    :param endtime: Desired endtime of data
+    :type endtime: obspy.core.UTCDateTime
+    :param daylong:
+        Whether data should be considered to be one-day long. Setting this will
+        assume that your data should start as close to the start of a day
+        as possible given the sampling.
+    :type daylong: bool
     :param seisan_chan_names:
-        Whether channels are named like seisan channels (which are two letters
-        rather than SEED convention of three) - defaults to True.
+        Whether to convert channel names to two-char seisan channel names
+    :type seisan_chan_names: bool
+    :param fill_gaps: Whether to fill-gaps in the data
     :type fill_gaps: bool
-    :param fill_gaps: Whether to pad any gaps found with zeros or not.
-    :type ignore_length: bool
     :param ignore_length:
-        Whether to allow data that are less than 80% of the requested length.
-        Defaults to False which will error if short data are found.
+        Whether to ignore data that are not long enough.
+    :type ignore_length: bool
+    :param ignore_bad_data: Whether to ignore data that are excessively gappy
     :type ignore_bad_data: bool
-    :param ignore_bad_data:
-        If False (default), errors will be raised if data are excessively
-        gappy or are mostly zeros. If True then no error will be raised, but
-        an empty trace will be returned.
-    :type fft_threads: int
-    :param fft_threads:
-        Number of threads to use for pyFFTW FFT in resampling. Note that it
-        is not recommended to use fft_threads > 1 and num_cores > 1.
-
-
-    :return: Processed stream
-    :rtype: :class:`obspy.core.stream.Stream`
-
-    .. note::
-        If your data contain gaps you should *NOT* fill those gaps before
-        using the pre-process functions. The pre-process functions will fill
-        the gaps internally prior to processing, process the data, then re-fill
-        the gaps with zeros to ensure correlations are not incorrectly
-        calculated within gaps. If your data have gaps you should pass a merged
-        stream without the `fill_value` argument (e.g.: `st = st.merge()`).
-
-    .. warning::
-        If you intend to use this for processing templates you should consider
-        how resampling will impact your cross-correlations. Minor differences
-        in resampling between day-long files (which you are likely to use for
-        continuous detection) and shorter files will reduce your
-        cross-correlations!
-
-    .. rubric:: Example, bandpass
-
-    >>> from obspy import read
-    >>> from eqcorrscan.utils.pre_processing import shortproc
-    >>> # Get the path to the test data
-    >>> import eqcorrscan
-    >>> import os
-    >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data'
-    >>> st = read(TEST_PATH + '/WAV/TEST_/2013-09-01-0410-35.DFDPC_024_00')
-    >>> st = shortproc(st=st, lowcut=2, highcut=9, filt_order=3, samp_rate=20,
-    ...                parallel=True, num_cores=2)
-    >>> print(st[0])
-    AF.LABE..SHZ | 2013-09-01T04:10:35.700000Z - 2013-09-01T04:12:05.650000Z \
-| 20.0 Hz, 1800 samples
 
-    .. rubric:: Example, low-pass
+    :return: Processed stream as obspy.core.Stream
 
-    >>> from obspy import read
-    >>> from eqcorrscan.utils.pre_processing import shortproc
-    >>> # Get the path to the test data
-    >>> import eqcorrscan
-    >>> import os
-    >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data'
-    >>> st = read(TEST_PATH + '/WAV/TEST_/2013-09-01-0410-35.DFDPC_024_00')
-    >>> st = shortproc(st=st, lowcut=None, highcut=9, filt_order=3,
-    ...                samp_rate=20)
-    >>> print(st[0])
-    AF.LABE..SHZ | 2013-09-01T04:10:35.700000Z - 2013-09-01T04:12:05.650000Z \
-| 20.0 Hz, 1800 samples
-
-    .. rubric:: Example, high-pass
-
-    >>> from obspy import read
-    >>> from eqcorrscan.utils.pre_processing import shortproc
-    >>> # Get the path to the test data
-    >>> import eqcorrscan
-    >>> import os
-    >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data'
-    >>> st = read(TEST_PATH + '/WAV/TEST_/2013-09-01-0410-35.DFDPC_024_00')
-    >>> st = shortproc(st=st, lowcut=2, highcut=None, filt_order=3,
-    ...                samp_rate=20)
-    >>> print(st[0])
-    AF.LABE..SHZ | 2013-09-01T04:10:35.700000Z - 2013-09-01T04:12:05.650000Z \
-| 20.0 Hz, 1800 samples
+    :Note: Works in place on your data, copy before giving to this function if
+           you want to reuse your input data.
     """
     if isinstance(st, Trace):
         tracein = True
@@ -182,494 +227,495 @@ def shortproc(st, lowcut, highcut, filt_order, samp_rate, parallel=False,
         tracein = False
     # Add sanity check for filter
     if highcut and highcut >= 0.5 * samp_rate:
-        raise IOError('Highcut must be lower than the nyquist')
-    length = None
-    clip = False
-    if starttime is not None and endtime is not None:
-        for tr in st:
-            tr.trim(starttime, endtime)
-            if len(tr.data) == ((endtime - starttime) *
-                                tr.stats.sampling_rate) + 1:
-                tr.data = tr.data[1:len(tr.data)]
-        length = endtime - starttime
-        clip = True
-    elif starttime:
-        for tr in st:
-            tr.trim(starttime=starttime)
-    elif endtime:
-        for tr in st:
-            tr.trim(endtime=endtime)
-    for tr in st:
-        if len(tr.data) == 0:
-            st.remove(tr)
-            Logger.warning('No data for {0} after trim'.format(tr.id))
+        raise IOError('Highcut must be lower than the Nyquist')
+    if highcut and lowcut:
+        assert lowcut < highcut, f"Lowcut: {lowcut} above highcut: {highcut}"
+
+    # Allow datetimes for starttime and endtime
+    if starttime and not isinstance(starttime, UTCDateTime):
+        starttime = UTCDateTime(starttime)
+    if starttime is False:
+        starttime = None
+    if endtime and not isinstance(endtime, UTCDateTime):
+        endtime = UTCDateTime(endtime)
+    if endtime is False:
+        endtime = None
+
+    # Make sensible choices about workers and chunk sizes
     if parallel:
         if not num_cores:
-            num_cores = cpu_count()
-        if num_cores > len(st):
-            num_cores = len(st)
-        pool = Pool(processes=num_cores)
-        results = [pool.apply_async(process, (tr,), {
-            'lowcut': lowcut, 'highcut': highcut, 'filt_order': filt_order,
-            'samp_rate': samp_rate, 'starttime': starttime,
-            'clip': clip, 'seisan_chan_names': seisan_chan_names,
-            'fill_gaps': fill_gaps, 'length': length,
-            'ignore_length': ignore_length, 'fft_threads': fft_threads,
-            'ignore_bad_data': ignore_bad_data})
-                   for tr in st]
-        pool.close()
-        try:
-            stream_list = [p.get() for p in results]
-        except KeyboardInterrupt as e:  # pragma: no cover
-            pool.terminate()
-            raise e
-        pool.join()
-        st = Stream(stream_list)
+            # We don't want to over-specify threads, we don't have IO
+            # bound tasks
+            max_workers = min(len(st), os.cpu_count())
+        else:
+            max_workers = min(len(st), num_cores)
     else:
-        for i, tr in enumerate(st):
-            st[i] = process(
-                tr=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
-                samp_rate=samp_rate, starttime=starttime,
-                clip=clip, seisan_chan_names=seisan_chan_names,
-                fill_gaps=fill_gaps, length=length,
-                ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
-                fft_threads=fft_threads)
-    if tracein:
-        st.merge()
-        return st[0]
-    return st
+        max_workers = 1
+    chunksize = len(st) // max_workers
 
+    st, length, clip, starttime = _sanitize_length(
+        st=st, starttime=starttime, endtime=endtime, daylong=daylong)
 
-def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime,
-            parallel=True, num_cores=False, ignore_length=False,
-            seisan_chan_names=False, fill_gaps=True, ignore_bad_data=False,
-            fft_threads=1):
-    """
-    Wrapper for dayproc to parallel multiple traces in a stream.
-
-    Works in place on data.  This is employed to ensure all parts of the data \
-    are processed in the same way.
-
-    :type st: obspy.core.stream.Stream
-    :param st: Stream to process (can be trace).
-    :type lowcut: float
-    :param lowcut: Low cut in Hz for bandpass.
-    :type highcut: float
-    :param highcut: High cut in Hz for bandpass.
-    :type filt_order: int
-    :param filt_order: Corners for bandpass.
-    :type samp_rate: float
-    :param samp_rate: Desired sampling rate in Hz.
-    :type starttime: obspy.core.utcdatetime.UTCDateTime
-    :param starttime: Desired start-date of trace.
-    :type parallel: bool
-    :param parallel:
-        Set to True to process traces in parallel, this is often faster than
-        serial processing of traces: defaults to True.
-    :type num_cores: int
-    :param num_cores:
-        Control the number of cores for parallel processing, if set to False
-        then this will use all the cores.
-    :type ignore_length: bool
-    :param ignore_length: See warning below.
-    :type seisan_chan_names: bool
-    :param seisan_chan_names:
-        Whether channels are named like seisan channels (which are two letters
-        rather than SEED convention of three) - defaults to True.
-    :type fill_gaps: bool
-    :param fill_gaps: Whether to pad any gaps found with zeros or not.
-    :type ignore_bad_data: bool
-    :param ignore_bad_data:
-        If False (default), errors will be raised if data are excessively
-        gappy or are mostly zeros. If True then no error will be raised, but
-        an empty trace will be returned.
-    :type fft_threads: int
-    :param fft_threads:
-        Number of threads to use for pyFFTW FFT in resampling. Note that it
-        is not recommended to use fft_threads > 1 and num_cores > 1.
+    for tr in st:
+        if len(tr.data) == 0:
+            st.remove(tr)
+            Logger.warning('No data for {0} after trim'.format(tr.id))
 
-    :return: Processed stream.
-    :rtype: :class:`obspy.core.stream.Stream`
+    # Do work
+    # 0. Enforce double-preccision floats for this work
+    for tr in st:
+        if not tr.data.dtype == np.float64:
+            Logger.debug(f"Converting {tr.id} to double precision")
+            tr.data = tr.data.astype(np.float64)
+    # 1. Fill gaps and keep track of them
+    gappy = {tr.id: False for tr in st}
+    gaps = dict()
+    for i, tr in enumerate(st):
+        if isinstance(tr.data, np.ma.MaskedArray):
+            gappy[tr.id] = True
+            gaps[tr.id], tr = _fill_gaps(tr)
+            st[i] = tr
+
+    # 2. Check for zeros and cope with bad data
+    # ~ 4x speedup for 50 100 Hz daylong traces on 12 threads
+    qual = _simple_qc(st, max_workers=max_workers, chunksize=chunksize)
+    for trace_id, _qual in qual.items():
+        if not _qual:
+            msg = ("Data have more zeros than actual data, please check the "
+                   f"raw data set-up and manually sort it: {tr.id}")
+            if not ignore_bad_data:
+                raise ValueError(msg)
+            else:
+                # Remove bad traces from the stream
+                try:
+                    st.remove(st.select(id=trace_id))
+                except ValueError:
+                    Logger.info(
+                        f"{trace_id} not found in {set(tr.id for tr in st)},"
+                        f" ignoring")
+
+    # 3. Detrend
+    # ~ 2x speedup for 50 100 Hz daylong traces on 12 threads
+    st = _multi_detrend(st, max_workers=max_workers, chunksize=chunksize)
+
+    # 4. Check length and pad to length
+    padded = {tr.id: (0., 0.) for tr in st}
+    if clip:
+        st.trim(starttime, starttime + length, nearest_sample=True)
+        # Indexing because we are going to overwrite traces
+        for i, _ in enumerate(st):
+            if float(st[i].stats.npts / st[i].stats.sampling_rate) != length:
+                Logger.info(
+                    'Data for {0} are not long-enough, will zero pad'.format(
+                        st[i].id))
+                st[i], padded[st[i].id] = _length_check(
+                    st[i], starttime=starttime, length=length,
+                    ignore_length=ignore_length,
+                    ignore_bad_data=ignore_bad_data)
+        # Remove None traces that might be returned from length checking
+        st.traces = [tr for tr in st if tr is not None]
+
+    # Check that we actually still have some data
+    if not _stream_has_data(st):
+        if tracein:
+            return st[0]
+        return st
+
+    # 5. Resample
+    # ~ 3.25x speedup for 50 100 Hz daylong traces on 12 threads
+    st = _multi_resample(
+        st, sampling_rate=samp_rate, max_workers=max_workers,
+        chunksize=chunksize)
+
+    # Detrend again before filtering
+    st = _multi_detrend(st, max_workers=max_workers, chunksize=chunksize)
+
+    # 6. Filter
+    # ~3.25x speedup for 50 100 Hz daylong traces on 12 threads
+    st = _multi_filter(
+        st, highcut=highcut, lowcut=lowcut, filt_order=filt_order,
+        max_workers=max_workers, chunksize=chunksize)
+
+    # 7. Reapply zeros after processing from 4
+    for tr in st:
+        # Pads default to (0., 0.), pads should only ever be positive.
+        if sum(padded[tr.id]) == 0:
+            continue
+        Logger.debug("Reapplying zero pads post processing")
+        Logger.debug(str(tr))
+        pre_pad = np.zeros(int(padded[tr.id][0] * tr.stats.sampling_rate))
+        post_pad = np.zeros(int(padded[tr.id][1] * tr.stats.sampling_rate))
+        pre_pad_len = len(pre_pad)
+        post_pad_len = len(post_pad)
+        Logger.debug(
+            f"Taking only valid data between {pre_pad_len} and "
+            f"{tr.stats.npts - post_pad_len} samples")
+        # Re-apply the pads, taking only the data section that was valid
+        tr.data = np.concatenate(
+            [pre_pad, tr.data[pre_pad_len: len(tr.data) - post_pad_len],
+             post_pad])
+        Logger.debug(str(tr))
 
-    .. note::
-        If your data contain gaps you should *NOT* fill those gaps before
-        using the pre-process functions. The pre-process functions will fill
-        the gaps internally prior to processing, process the data, then re-fill
-        the gaps with zeros to ensure correlations are not incorrectly
-        calculated within gaps. If your data have gaps you should pass a merged
-        stream without the `fill_value` argument (e.g.: `st = st.merge()`).
-
-    .. warning::
-        Will fail if data are less than 19.2 hours long - this number is
-        arbitrary and is chosen to alert the user to the dangers of padding
-        to day-long, if you don't care you can ignore this error by setting
-        `ignore_length=True`. Use this option at your own risk!  It will also
-        warn any-time it has to pad data - if you see strange artifacts in your
-        detections, check whether the data have gaps.
+    # 8. Recheck length
+    for tr in st:
+        if float(tr.stats.npts * tr.stats.delta) != length and clip:
+            Logger.info(f'Data for {tr.id} are not of required length, will '
+                        f'zero pad')
+            # Use obspy's trim function with zero padding
+            tr = tr.trim(starttime, starttime + length, pad=True, fill_value=0,
+                         nearest_sample=True)
+            # If there is one sample too many after this remove the last one
+            # by convention
+            if len(tr.data) == (length * tr.stats.sampling_rate) + 1:
+                tr.data = tr.data[1:len(tr.data)]
+            if abs((tr.stats.sampling_rate * length) -
+                   tr.stats.npts) > tr.stats.delta:
+                raise ValueError('Data are not required length for ' +
+                                 tr.stats.station + '.' + tr.stats.channel)
 
-    .. rubric:: Example
+    # 9. Re-insert gaps from 1
+    for i, tr in enumerate(st):
+        if gappy[tr.id]:
+            st[i] = _zero_pad_gaps(tr, gaps[tr.id], fill_gaps=fill_gaps)
 
-    >>> import obspy
-    >>> if int(obspy.__version__.split('.')[0]) >= 1:
-    ...     from obspy.clients.fdsn import Client
-    ... else:
-    ...     from obspy.fdsn import Client
-    >>> from obspy import UTCDateTime
-    >>> from eqcorrscan.utils.pre_processing import dayproc
-    >>> client = Client('NCEDC')
-    >>> t1 = UTCDateTime(2012, 3, 26)
-    >>> t2 = t1 + 86400
-    >>> bulk_info = [('BP', 'JCNB', '40', 'SP1', t1, t2)]
-    >>> st = client.get_waveforms_bulk(bulk_info)
-    >>> st_keep = st.copy()  # Copy the stream for later examples
-    >>> # Example of bandpass filtering
-    >>> st = dayproc(st=st, lowcut=2, highcut=9, filt_order=3, samp_rate=20,
-    ...              starttime=t1, parallel=True, num_cores=2)
-    >>> print(st[0])
-    BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
-950000Z | 20.0 Hz, 1728000 samples
-    >>> # Example of lowpass filtering
-    >>> st = dayproc(st=st, lowcut=None, highcut=9, filt_order=3, samp_rate=20,
-    ...              starttime=t1, parallel=True, num_cores=2)
-    >>> print(st[0])
-    BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
-950000Z | 20.0 Hz, 1728000 samples
-    >>> # Example of highpass filtering
-    >>> st = dayproc(st=st, lowcut=2, highcut=None, filt_order=3, samp_rate=20,
-    ...              starttime=t1, parallel=True, num_cores=2)
-    >>> print(st[0])
-    BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
-950000Z | 20.0 Hz, 1728000 samples
-    """
-    # Add sanity check for filter
-    if isinstance(st, Trace):
-        st = Stream(st)
-        tracein = True
-    else:
-        tracein = False
-    if highcut and highcut >= 0.5 * samp_rate:
-        raise IOError('Highcut must be lower than the nyquist')
-    # Set the start-time to a day start - cope with
-    if starttime is None:
-        startdates = []
-        for tr in st:
-            if abs(tr.stats.starttime - (UTCDateTime(
-                    tr.stats.starttime.date) + 86400)) < tr.stats.delta:
-                # If the trace starts within 1 sample of the next day, use the
-                # next day as the startdate
-                startdates.append((tr.stats.starttime + 86400).date)
-                Logger.warning(
-                    '{0} starts within 1 sample of the next day, using this '
-                    'time {1}'.format(
-                        tr.id, (tr.stats.starttime + 86400).date))
-            else:
-                startdates.append(tr.stats.starttime.date)
-        # Check that all traces start on the same date...
-        if not len(set(startdates)) == 1:
-            raise NotImplementedError('Traces start on different days')
-        starttime = UTCDateTime(startdates[0])
-    if parallel:
-        if not num_cores:
-            num_cores = cpu_count()
-        if num_cores > len(st):
-            num_cores = len(st)
-        pool = Pool(processes=num_cores)
-        results = [pool.apply_async(process, (tr,), {
-            'lowcut': lowcut, 'highcut': highcut, 'filt_order': filt_order,
-            'samp_rate': samp_rate, 'starttime': starttime, 'clip': True,
-            'ignore_length': ignore_length, 'length': 86400,
-            'seisan_chan_names': seisan_chan_names, 'fill_gaps': fill_gaps,
-            'ignore_bad_data': ignore_bad_data, 'fft_threads': fft_threads})
-                   for tr in st]
-        pool.close()
-        try:
-            stream_list = [p.get() for p in results]
-        except KeyboardInterrupt as e:  # pragma: no cover
-            pool.terminate()
-            raise e
-        pool.join()
-        st = Stream(stream_list)
-    else:
-        for i, tr in enumerate(st):
-            st[i] = process(
-                tr=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
-                samp_rate=samp_rate, starttime=starttime, clip=True,
-                length=86400, ignore_length=ignore_length,
-                seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps,
-                ignore_bad_data=ignore_bad_data, fft_threads=fft_threads)
+    # 10. Clean up
     for tr in st:
         if len(tr.data) == 0:
             st.remove(tr)
+
+    # 11. Account for seisan channel naming
+    if seisan_chan_names:
+        for tr in st:
+            tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1]
+
     if tracein:
         st.merge()
         return st[0]
+
     return st
 
 
-def process(tr, lowcut, highcut, filt_order, samp_rate,
-            starttime=False, clip=False, length=86400,
-            seisan_chan_names=False, ignore_length=False, fill_gaps=True,
-            ignore_bad_data=False, fft_threads=1):
+@lru_cache(maxsize=50)
+def _empty_trace(
+        network,
+        station,
+        location,
+        channel,
+        starttime,
+        sampling_rate
+):
+    """
+    Generate an empty trace with a basic header matching the input trace
+
+    :param network: Network code
+    :type network: str
+    :param station: Station code
+    :type station: str
+    :param location: Location code
+    :type location: str
+    :param channel: Channel code
+    :type channel: str
+    :param starttime: Start time of trace as datetime (NOT UTCDateTime)
+    :type starttime: datetime.datetime
+    :param sampling_rate: Sampling rate of data
+    :type sampling_rate: float
+
+    :returns: trace
     """
-    Basic function to process data, usually called by dayproc or shortproc.
+    bad_trace = Trace(
+        data=np.array([]), header={
+            "station": station, "channel": channel,
+            "network": network, "location": location,
+            "starttime": starttime,
+            "sampling_rate": sampling_rate})
+    return bad_trace
 
-    Functionally, this will bandpass, downsample and check headers and length
-    of trace to ensure files start when they should and are the correct length.
-    This is a simple wrapper on obspy functions, we include it here to provide
-    a system to ensure all parts of the dataset are processed in the same way.
 
-    .. note:: Usually this function is called via dayproc or shortproc.
+def _stream_has_data(st):
+    return sum(tr.stats.npts for tr in st) > 0
 
-    :type tr: obspy.core.trace.Trace
-    :param tr: Trace to process
-    :type lowcut: float
-    :param lowcut:
-        Low cut in Hz, if set to None and highcut is set, will use
-        a lowpass filter.
-    :type highcut: float
-    :param highcut:
-        High cut in Hz, if set to None and lowcut is set, will use
-        a highpass filter.
-    :type filt_order: int
-    :param filt_order: Number of corners for filter.
-    :type samp_rate: float
-    :param samp_rate: Desired sampling rate in Hz.
-    :type starttime: obspy.core.utcdatetime.UTCDateTime
-    :param starttime: Desired start of trace
-    :type clip: bool
-    :param clip: Whether to expect, and enforce a set length of data or not.
+
+def _length_check(tr, starttime, length, ignore_length, ignore_bad_data):
+    """
+    Check that a trace meets the length requirements specified.
+
+    Data are padded if needed to meet the length requirement.
+
+    :param tr: Trace to check
+    :type tr: obspy.core.Trace
+    :param starttime: Desired starttime of data
+    :type starttime: obspy.core.UTCDateTime
+    :param length: Length in seconds required for data
     :type length: float
-    :param length: Use to set a fixed length for data from the given starttime.
-    :type seisan_chan_names: bool
-    :param seisan_chan_names:
-        Whether channels are named like seisan channels (which are two letters
-        rather than SEED convention of three) - defaults to True.
+    :param ignore_length:
+        Whether to ignore data that do not meet length criteria
     :type ignore_length: bool
-    :param ignore_length: See warning in dayproc.
-    :type fill_gaps: bool
-    :param fill_gaps: Whether to pad any gaps found with zeros or not.
-    :type ignore_bad_data: bool
     :param ignore_bad_data:
-        If False (default), errors will be raised if data are excessively
-        gappy or are mostly zeros. If True then no error will be raised, but
-        an empty trace will be returned.
-    :type fft_threads: int
-    :param fft_threads: Number of threads to use for pyFFTW FFT in resampling
-
-    :return: Processed trace.
-    :type: :class:`obspy.core.stream.Trace`
+        Whether to ignore data that do not meet gappiness criteria
+    :type ignore_bad_data: bool
 
-    .. note::
-        If your data contain gaps you should *NOT* fill those gaps before
-        using the pre-process functions. The pre-process functions will fill
-        the gaps internally prior to processing, process the data, then re-fill
-        the gaps with zeros to ensure correlations are not incorrectly
-        calculated within gaps. If your data have gaps you should pass a merged
-        stream without the `fill_value` argument (e.g.: `tr = tr.merge()`).
+    :return: obspy.core.Trace that meets criteria
     """
-    # Add sanity check
-    if highcut and highcut >= 0.5 * samp_rate:
-        raise IOError('Highcut must be lower than the nyquist')
-
-    # Define the start-time
-    if starttime:
-        # Be nice and allow a datetime object.
-        if isinstance(starttime, dt.date) or isinstance(starttime,
-                                                        dt.datetime):
-            starttime = UTCDateTime(starttime)
-
-    Logger.debug('Working on: {0}'.format(tr.id))
-    # Check if the trace is gappy and pad if it is.
-    gappy = False
-    if isinstance(tr.data, np.ma.MaskedArray):
-        gappy = True
-        gaps, tr = _fill_gaps(tr)
-    # Do a brute force quality check
-    qual = _check_daylong(tr)
-    if not qual:
-        msg = ("Data have more zeros than actual data, please check the raw",
-               " data set-up and manually sort it: " + tr.stats.station + "." +
-               tr.stats.channel)
+    trace_length = tr.stats.endtime - tr.stats.starttime
+    if trace_length < 0.8 * length and not ignore_length:
+        msg = f"Data for {tr.id} is {trace_length:.2f} seconds "\
+              f"long, which is less than 80 percent of the desired "\
+              f"length ({length} seconds), will not pad"
         if not ignore_bad_data:
-            raise ValueError(msg)
+            raise NotImplementedError(msg)
         else:
             Logger.warning(msg)
-            return Trace(data=np.array([]), header={
-                "station": tr.stats.station, "channel": tr.stats.channel,
-                "network": tr.stats.network, "location": tr.stats.location,
-                "starttime": tr.stats.starttime,
-                "sampling_rate": tr.stats.sampling_rate})
-    tr = tr.detrend('simple')
-    # Detrend data before filtering
-    Logger.debug('I have {0} data points for {1} before processing'.format(
-        tr.stats.npts, tr.id))
-
-    # Sanity check to ensure files are daylong
-    padded = False
-    if clip:
-        tr = tr.trim(starttime, starttime + length, nearest_sample=True)
-    if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip:
+            return _empty_trace(tr.stats.network, tr.stats.station,
+                                tr.stats.location, tr.stats.channel,
+                                tr.stats.starttime.datetime,
+                                tr.stats.sampling_rate), (0., 0.)
+    # trim, then calculate length of any pads required
+    pre_pad_secs = tr.stats.starttime - starttime
+    post_pad_secs = (starttime + length) - tr.stats.endtime
+    if pre_pad_secs > 0 or post_pad_secs > 0:
+        pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate))
+        post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate))
+        Logger.debug(str(tr))
         Logger.info(
-            'Data for {0} are not long-enough, will zero pad'.format(
-                tr.id))
-        if tr.stats.endtime - tr.stats.starttime < 0.8 * length\
-           and not ignore_length:
-            msg = (
-                "Data for {0}.{1} is {2:.2f} seconds long, which is less than "
-                "80 percent of the desired length ({3} seconds), will not "
-                "pad".format(
-                    tr.stats.station, tr.stats.channel,
-                    tr.stats.endtime - tr.stats.starttime, length))
-            if not ignore_bad_data:
-                raise NotImplementedError(msg)
-            else:
-                Logger.warning(msg)
-                return Trace(data=np.array([]), header={
-                    "station": tr.stats.station, "channel": tr.stats.channel,
-                    "network": tr.stats.network, "location": tr.stats.location,
-                    "starttime": tr.stats.starttime,
-                    "sampling_rate": tr.stats.sampling_rate})
-        # trim, then calculate length of any pads required
-        pre_pad_secs = tr.stats.starttime - starttime
-        post_pad_secs = (starttime + length) - tr.stats.endtime
-        if pre_pad_secs > 0 or post_pad_secs > 0:
-            padded = True
-            pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate))
-            post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate))
-            Logger.debug(str(tr))
-            Logger.info("Padding to length with {0} s before and {1} s "
-                        "at end".format(pre_pad_secs, post_pad_secs))
-            tr.data = np.concatenate([pre_pad, tr.data, post_pad])
-            # Use this rather than the expected pad because of rounding samples
-            tr.stats.starttime -= len(pre_pad) * tr.stats.delta
-            Logger.debug(str(tr))
-        # If there is one sample too many after this remove the first one
-        # by convention
-        if tr.stats.npts == (length * tr.stats.sampling_rate) + 1:
-            tr.data = tr.data[1:len(tr.data)]
-        # Cope with time precision.
-        if abs((tr.stats.sampling_rate * length) -
-               tr.stats.npts) > tr.stats.delta:
-            msg = ("Data sampling-rate * length ({0} * {1} = {2}) does not "
-                   "match number of samples ({3}) for {4}".format(
-                    tr.stats.sampling_rate, length,
-                    tr.stats.sampling_rate * length, tr.stats.npts, tr.id))
-            if not ignore_bad_data:
-                raise ValueError(msg)
-            else:
-                Logger.warning(msg)
-                return Trace(data=np.array([]), header={
-                    "station": tr.stats.station, "channel": tr.stats.channel,
-                    "network": tr.stats.network, "location": tr.stats.location,
-                    "starttime": tr.stats.starttime,
-                    "sampling_rate": tr.stats.sampling_rate})
-        Logger.debug(
-            'I now have {0} data points after enforcing length'.format(
-                tr.stats.npts))
-    # Check sampling rate and resample
-    if tr.stats.sampling_rate != samp_rate:
-        Logger.debug('Resampling')
-        tr = _resample(tr, samp_rate, threads=fft_threads)
-    # Filtering section
-    tr = tr.detrend('simple')    # Detrend data again before filtering
+            f"Padding to length with {pre_pad_secs} s before "
+            f"and {post_pad_secs} s at end")
+        tr.data = np.concatenate([pre_pad, tr.data, post_pad])
+        # Use this rather than the expected pad because of rounding samples
+        tr.stats.starttime -= len(pre_pad) * tr.stats.delta
+        Logger.debug(str(tr))
+    # If there is one sample too many after this remove the first one
+    # by convention
+    if tr.stats.npts == (length * tr.stats.sampling_rate) + 1:
+        tr.data = tr.data[1:len(tr.data)]
+    # Cope with time precision.
+    if abs((tr.stats.sampling_rate * length) -
+           tr.stats.npts) > tr.stats.delta:
+        msg = (f"Data sampling-rate * length ({tr.stats.sampling_rate} *"
+               f" {length} = {tr.stats.sampling_rate * length}) does not "
+               f"match number of samples ({tr.stats.npts}) for {tr.id}")
+        if not ignore_bad_data:
+            raise ValueError(msg)
+        else:
+            Logger.warning(msg)
+            return _empty_trace(tr.stats.network, tr.stats.station,
+                                tr.stats.location, tr.stats.channel,
+                                tr.stats.starttime.datetime,
+                                tr.stats.sampling_rate), (0., 0.)
+    Logger.debug(
+        f'I now have {tr.stats.npts} data points after enforcing length')
+    return tr, (pre_pad_secs, post_pad_secs)
+
+
+def _multi_filter(st, highcut, lowcut, filt_order, max_workers=None,
+                  chunksize=1):
+    """
+    Multithreaded zero-phase butterworth filtering of multi-channel data.
+
+    :param st: Stream to filter
+    :type st: obspy.core.Stream
+    :param highcut: Highcut for butterworth filter in Hz
+    :type highcut: float
+    :param lowcut: Lowcut for butterworth filter in Hz
+    :type lowcut: float
+    :param filt_order: Filter order
+    :type filt_order: int
+    :param max_workers: Maximum number of threads to use
+    :type max_workers: int
+    :param chunksize: Number of traces to process per thread
+    :type chunksize: int
+
+    :return: obspy.core.Stream of filtered data
+    """
+    if not highcut and not lowcut:
+        Logger.warning("No filters applied")
+        return st
+    # Require that all channels are the same sampling frequency
+    samp_rate = set(tr.stats.sampling_rate for tr in st)
+    assert len(samp_rate) == 1, "Different sampling rates found"
+    samp_rate = samp_rate.pop()
+    # Sanity check filter bounds
+    if highcut:
+        assert highcut * 2 < samp_rate, "Highcut must be below Nyquist"
+    if highcut and lowcut:
+        assert lowcut < highcut, "Lowcut must be below highcut"
+
+    fe = 0.5 * samp_rate
+    if lowcut:
+        low = lowcut / fe
+    if highcut:
+        high = highcut / fe
+
+    # Design filter
     if highcut and lowcut:
-        Logger.debug('Bandpassing')
-        tr.data = bandpass(tr.data, lowcut, highcut,
-                           tr.stats.sampling_rate, filt_order, True)
+        z, p, k = iirfilter(
+            filt_order, [low, high], btype='band',
+            ftype='butter', output='zpk')
     elif highcut:
-        Logger.debug('Lowpassing')
-        tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate,
-                          filt_order, True)
+        z, p, k = iirfilter(
+            filt_order, high, btype='lowpass', ftype='butter',
+            output='zpk')
     elif lowcut:
-        Logger.debug('Highpassing')
-        tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate,
-                           filt_order, True)
-    else:
-        Logger.warning('No filters applied')
-    # Account for two letter channel names in s-files and therefore templates
-    if seisan_chan_names:
-        tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1]
+        z, p, k = iirfilter(
+            filt_order, low, btype='highpass', ftype='butter',
+            output='zpk')
 
-    if padded:
-        Logger.debug("Reapplying zero pads post processing")
-        Logger.debug(str(tr))
-        pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate))
-        post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate))
-        pre_pad_len = len(pre_pad)
-        post_pad_len = len(post_pad)
-        Logger.debug(
-            "Taking only valid data between {0} and {1} samples".format(
-                pre_pad_len, tr.stats.npts - post_pad_len))
-        # Re-apply the pads, taking only the data section that was valid
-        tr.data = np.concatenate(
-            [pre_pad, tr.data[pre_pad_len: len(tr.data) - post_pad_len],
-             post_pad])
-        Logger.debug(str(tr))
-    # Sanity check to ensure files are correct length
-    if float(tr.stats.npts * tr.stats.delta) != length and clip:
-        Logger.info(
-            'Data for {0} are not of required length, will zero pad'.format(
-                tr.id))
-        # Use obspy's trim function with zero padding
-        tr = tr.trim(starttime, starttime + length, pad=True, fill_value=0,
-                     nearest_sample=True)
-        # If there is one sample too many after this remove the last one
-        # by convention
-        if len(tr.data) == (length * tr.stats.sampling_rate) + 1:
-            tr.data = tr.data[1:len(tr.data)]
-        if abs((tr.stats.sampling_rate * length) -
-               tr.stats.npts) > tr.stats.delta:
-            raise ValueError('Data are not required length for ' +
-                             tr.stats.station + '.' + tr.stats.channel)
-    # Replace the gaps with zeros
-    if gappy:
-        tr = _zero_pad_gaps(tr, gaps, fill_gaps=fill_gaps)
-    return tr
+    sos = zpk2sos(z, p, k)
+
+    _filter = partial(_zerophase_filter, sos)
 
+    with ThreadPoolExecutor(max_workers) as executor:
+        results = executor.map(
+            _filter, (tr.data for tr in st), chunksize=chunksize)
 
-def _resample(tr, sampling_rate, threads=1):
+    for r, tr in zip(results, st):
+        tr.data = r
+
+    return st
+
+
+def _zerophase_filter(sos, data):
     """
-    Provide a pyfftw version of obspy's trace resampling.  This code is
-    modified from obspy's Trace.resample method.
+    Simple zerophase implementation of sosfilt.
+
+    :param sos: Second-order-series of filters
+    :param data: Data to filter
+    :return: filtered data
     """
-    from scipy.signal import get_window
-    from pyfftw.interfaces.scipy_fftpack import rfft, irfft
-
-    factor = tr.stats.sampling_rate / float(sampling_rate)
-    # resample in the frequency domain. Make sure the byteorder is native.
-    x = rfft(tr.data.newbyteorder("="), threads=threads)
-    # Cast the value to be inserted to the same dtype as the array to avoid
-    # issues with numpy rule 'safe'.
-    x = np.insert(x, 1, x.dtype.type(0))
-    if tr.stats.npts % 2 == 0:
-        x = np.append(x, [0])
-    x_r = x[::2]
-    x_i = x[1::2]
-
-    large_w = np.fft.ifftshift(
-        get_window("hanning", tr.stats.npts))
-    x_r *= large_w[:tr.stats.npts // 2 + 1]
-    x_i *= large_w[:tr.stats.npts // 2 + 1]
+    if len(data) == 0:
+        Logger.debug("No data, no filtering")
+        return data
+    firstpass = sosfilt(sos, data)
+    return sosfilt(sos, firstpass[::-1])[::-1]
+
+
+def _multi_detrend(st, max_workers=None, chunksize=1):
+    """
+    Multithreaded detrending using simple linear detrend between
+    first and last values. Follows obspy "simple" detrend.
+
+    :param st: Stream to detrend
+    :type st: obspy.core.Stream
+    :param max_workers: Maximum number of threads to use
+    :type max_workers: int
+    :param chunksize: Number of traces to process per thread
+    :type chunksize: int
+
+    :return: obspy.core.Stream of detrended data
+    """
+    for tr in st:
+        tr.data = np.require(tr.data, np.float64)
+    with ThreadPoolExecutor(max_workers) as executor:
+        results = executor.map(_detrend, (tr.data for tr in st),
+                               chunksize=chunksize)
+    # Ensure tasks complete
+    _ = (r for r in results)
+    return st
+
+
+def _detrend(data):
+    """
+    Detrend signal simply by subtracting a line through the first and last
+    point of the trace
+
+    :param data: Data to detrend
+    :type data: np.ndarray.
+    :return: Nothing - works in place
+    """
+    # Work in double-precision.
+    data = np.require(data, dtype=np.float64)
+    ndat = data.shape[0]
+    x1, x2 = data[0], data[-1]
+    data -= x1 + np.arange(ndat, dtype=np.float64) * (
+        np.float64(x2 - x1) / np.float64(ndat - 1))
+    return
+
+
+def _multi_resample(st, sampling_rate, max_workers=None, chunksize=1):
+    """
+    Threaded resampling of a stream of data to a consistent sampling-rate
+
+    :param st: Stream to resample
+    :type st: obspy.core.Stream
+    :param sampling_rate: Sampling rate to resample to
+    :type sampling_rate: float
+    :param max_workers: Maximum number of threads to use
+    :type max_workers: int
+    :param chunksize: Number of traces to process per thread
+    :type chunksize: int
+
+    :return: obspy.core.Stream of resampled data
+    """
+    # Get the windows, and downsampling factors ahead of time
+    to_resample = (
+        (tr.data, tr.stats.delta,
+         tr.stats.sampling_rate / float(sampling_rate),
+         sampling_rate, _get_window("hann", tr.stats.npts), tr.id)
+        for tr in st)
+    with ThreadPoolExecutor(max_workers) as executor:
+        # Unpack tuple using lambda
+        results = executor.map(lambda args: _resample(*args), to_resample,
+                               chunksize=chunksize)
+    for r, tr in zip(results, st):
+        tr.data = r
+        tr.stats.sampling_rate = sampling_rate
+    return st
+
+
+def _resample(data, delta, factor, sampling_rate, large_w, _id):
+    """
+    Resample data in the frequency domain - adapted from obspy resample method
+
+    :param data: Data to resample
+    :type data: np.ndarray
+    :param delta: Sample interval in seconds
+    :type delta: float
+    :param factor: Factor to resample by
+    :type factor: float
+    :param sampling_rate: Desired sampling-rate
+    :type sampling_rate: float
+    :param large_w: Window to apply to spectra to stabilise resampling
+    :type large_w: np.ndarray
+
+    :return: np.ndarray of resampled data.
+    """
+    if factor == 1:
+        # No resampling needed, don't waste time.
+        return data
+    # Need to work with numpy objects to release the GIL
+    npts = data.shape[0]
+    Logger.debug(f"Running resample for {_id} with {npts} data points")
+    Logger.debug(f"{_id}: delta={delta}, factor={factor}, "
+                 f"sampling_rate out={sampling_rate}")
+    Logger.debug(f"Sanity check data for {_id}, start and "
+                 f"end: {data[0]} -- {data[-1]}")
+    Logger.debug(f"dtype for {_id}: {data.dtype}")
+    if data.dtype == np.dtype('float64'):
+        _floater = np.float64  # Retain double-precision
+    else:
+        _floater = np.float32
+        # Use single-precision where possible to reduce memory
+    data = data.astype(_floater)
+    df = _floater(1.0) / (npts * delta)
+    num = np.int32(npts / factor)
+    d_large_f = _floater(1.0) / num * sampling_rate
+
+    # Forward fft
+    x = np.fft.rfft(data)
+    # Window
+    x *= large_w[:npts // 2 + 1]
 
     # interpolate
-    num = int(tr.stats.npts / factor)
-    df = 1.0 / (tr.stats.npts * tr.stats.delta)
-    d_large_f = 1.0 / num * sampling_rate
-    f = df * np.arange(0, tr.stats.npts // 2 + 1, dtype=np.int32)
+    f = df * np.arange(0, npts // 2 + 1, dtype=np.int32)
     n_large_f = num // 2 + 1
     large_f = d_large_f * np.arange(0, n_large_f, dtype=np.int32)
-    large_y = np.zeros((2 * n_large_f))
-    large_y[::2] = np.interp(large_f, f, x_r)
-    large_y[1::2] = np.interp(large_f, f, x_i)
 
-    large_y = np.delete(large_y, 1)
-    if num % 2 == 0:
-        large_y = np.delete(large_y, -1)
-    tr.data = irfft(large_y, threads=threads) * (
-            float(num) / float(tr.stats.npts))
-    tr.stats.sampling_rate = sampling_rate
+    # Have to split into real and imaginary parts for interpolation.
+    y = np.interp(large_f, f, np.real(x)) + (1j * np.interp(
+        large_f, f, np.imag(x)))
+    # Try to reduce memory before doing the ifft
+    del large_f, f, x
 
-    return tr
+    return np.fft.irfft(y, n=num)[0:num] * (_floater(num) / _floater(npts))
 
 
 def _zero_pad_gaps(tr, gaps, fill_gaps=True):
@@ -682,19 +728,18 @@ def _zero_pad_gaps(tr, gaps, fill_gaps=True):
     :param tr: A trace that has had the gaps padded
     :param gaps: List of dict of start-time and end-time as UTCDateTime objects
     :type gaps: list
+    :param fill_gaps: Whether to fill gaps with zeros, or leave them as gaps
+    :type fill_gaps: bool
 
     :return: :class:`obspy.core.stream.Trace`
     """
     start_in, end_in = (tr.stats.starttime, tr.stats.endtime)
+    tr = Stream([tr])  # convert to stream to use cutout method
     for gap in gaps:
-        stream = Stream()
-        if gap['starttime'] > tr.stats.starttime:
-            stream += tr.slice(tr.stats.starttime, gap['starttime']).copy()
-        if gap['endtime'] < tr.stats.endtime:
-            # Note this can happen when gaps are calculated for a trace that
-            # is longer than `length`, e.g. gaps are calculated pre-trim.
-            stream += tr.slice(gap['endtime'], tr.stats.endtime).copy()
-        tr = stream.merge()[0]
+        Logger.debug(
+            f"Filling gap between {gap['starttime']} and {gap['endtime']}")
+        tr.cutout(gap['starttime'], gap['endtime']).merge()
+    tr = tr.merge()[0]
     if fill_gaps:
         tr = tr.split()
         tr = tr.detrend()
@@ -725,9 +770,248 @@ def _fill_gaps(tr):
     gaps = tr.get_gaps()
     tr = tr.detrend().merge(fill_value=0)[0]
     gaps = [{'starttime': gap[4], 'endtime': gap[5]} for gap in gaps]
+    if len(gaps):
+        Logger.debug(f"Gaps in {tr.id}: \n\t{gaps}")
     return gaps, tr
 
 
+def _group_process(filt_order, highcut, lowcut, samp_rate, process_length,
+                   parallel, cores, stream, daylong,
+                   ignore_length, ignore_bad_data, overlap):
+    """
+    Process and chunk data.
+
+    :type parallel: bool
+    :param parallel: Whether to use parallel processing or not
+    :type cores: int
+    :param cores: Number of cores to use, can be False to use all available.
+    :type stream: :class:`obspy.core.stream.Stream`
+    :param stream: Stream to process, will be left intact.
+    :type daylong: bool
+    :param daylong: Whether to enforce day-length files or not.
+    :type ignore_length: bool
+    :param ignore_length:
+        If using daylong=True, then processing will try check that the data
+        are there for at least 80% of the day, if you don't want this check
+        (which will raise an error if too much data are missing) then set
+        ignore_length=True.  This is not recommended!
+    :type ignore_bad_data: bool
+    :param ignore_bad_data:
+        If False (default), errors will be raised if data are excessively
+        gappy or are mostly zeros. If True then no error will be raised, but
+        an empty trace will be returned.
+    :type overlap: float
+    :param overlap: Number of seconds to overlap chunks by.
+
+    :return: list of processed streams.
+    """
+    processed_streams = []
+    kwargs = {
+        'filt_order': filt_order,
+        'highcut': highcut, 'lowcut': lowcut,
+        'samp_rate': samp_rate, 'parallel': parallel,
+        'num_cores': cores, 'ignore_length': ignore_length,
+        'ignore_bad_data': ignore_bad_data}
+    # Processing always needs to be run to account for gaps - pre-process will
+    # check whether filtering and resampling needs to be done.
+
+    starttimes = sorted([tr.stats.starttime for tr in stream])
+    endtimes = sorted([tr.stats.endtime for tr in stream])
+
+    if daylong:
+        if process_length != 86400:
+            Logger.warning(
+                f'Processing day-long data, but template was cut from '
+                f'{process_length} s long data, will reduce correlations')
+        process_length = 86400
+        # Check that data all start on the same day, otherwise strange
+        # things will happen...
+        startdates = [starttime.date for starttime in starttimes]
+        if not len(set(startdates)) == 1:
+            Logger.warning('Data start on different days, setting to last day')
+            starttime = UTCDateTime(startdates[-1])
+        else:
+            starttime = UTCDateTime(startdates[0])  # Can take any
+    else:
+        # We want to use shortproc to allow overlaps
+        starttime = starttimes[0]
+    endtime = endtimes[-1]
+    data_len_samps = round((endtime - starttime) * samp_rate) + 1
+    assert overlap < process_length, "Overlap must be less than process length"
+    chunk_len_samps = (process_length - overlap) * samp_rate
+    n_chunks = int(data_len_samps // chunk_len_samps)
+    Logger.info(f"Splitting these data in {n_chunks} chunks")
+    if n_chunks == 0:
+        Logger.error('Data must be process_length or longer, not computing')
+        return []
+
+    for i in range(n_chunks):
+        kwargs.update(
+            {'starttime': starttime + (i * (process_length - overlap))})
+        if not daylong:
+            _endtime = kwargs['starttime'] + process_length
+            kwargs.update({'endtime': _endtime})
+        else:
+            _endtime = kwargs['starttime'] + 86400
+
+        # This is where data should be copied and only here!
+        if n_chunks > 1:
+            chunk_stream = _quick_copy_stream(
+                stream.slice(starttime=kwargs['starttime'], endtime=_endtime))
+            # Reduce memory by removing data that we don't need anymore
+            stream.trim(starttime=_endtime - overlap)
+        else:
+            # If we only have one chunk, lets just use those data!
+            chunk_stream = stream.trim(
+                starttime=kwargs['starttime'], endtime=_endtime)
+        Logger.info(f"Processing chunk {i} between {kwargs['starttime']} "
+                    f"and {_endtime}")
+        if len(chunk_stream) == 0:
+            Logger.warning(
+                f"No data between {kwargs['starttime']} and {_endtime}")
+            continue
+        # Enforce chunk npts
+        for tr in chunk_stream:
+            Logger.info(
+                f"Enforcing {int(process_length * tr.stats.sampling_rate)} "
+                f"samples for {tr.id} (had {tr.stats.npts} points)")
+            tr.data = tr.data[0:int(
+                process_length * tr.stats.sampling_rate)]
+        _chunk_stream_lengths = {
+            tr.id: tr.stats.endtime - tr.stats.starttime
+            for tr in chunk_stream}
+        for tr_id, chunk_length in _chunk_stream_lengths.items():
+            # Remove traces that are too short.
+            if not ignore_length and chunk_length <= .8 * process_length:
+                tr = chunk_stream.select(id=tr_id)[0]
+                chunk_stream.remove(tr)
+                Logger.warning(
+                    "Data chunk on {0} starting {1} and ending {2} is "
+                    "below 80% of the requested length, will not use"
+                    " this.".format(
+                        tr.id, tr.stats.starttime, tr.stats.endtime))
+        if len(chunk_stream) == 0:
+            continue
+        Logger.debug(
+            f"Processing chunk:\n{chunk_stream.__str__(extended=True)}")
+        Logger.info(f"Processing using {kwargs}")
+        _processed_stream = multi_process(st=chunk_stream, **kwargs)
+        # If data have more zeros then pre-processing will return a
+        # trace of 0 length
+        _processed_stream.traces = [
+            tr for tr in _processed_stream if tr.stats.npts != 0]
+        if len(_processed_stream) == 0:
+            Logger.warning(
+                f"Data quality insufficient between {kwargs['starttime']}"
+                f" and {_endtime}")
+            continue
+        # Pre-processing does additional checks for zeros - we need to check
+        # again whether we actually have something useful from this.
+        processed_chunk_stream_lengths = [
+            tr.stats.endtime - tr.stats.starttime
+            for tr in _processed_stream]
+        if min(processed_chunk_stream_lengths) >= .8 * process_length:
+            processed_streams.append(_processed_stream)
+        else:
+            Logger.warning(
+                f"Data quality insufficient between {kwargs['starttime']}"
+                f" and {_endtime}")
+            continue
+
+    if _endtime < stream[0].stats.endtime:
+        Logger.warning(
+            "Last bit of data between {0} and {1} will go unused "
+            "because it is shorter than a chunk of {2} s".format(
+                _endtime, stream[0].stats.endtime, process_length))
+    return processed_streams
+
+
+def _quick_copy_trace(trace, deepcopy_data=True):
+    """
+    Function to quickly copy a trace. Sets values in the traces' and trace
+    header's dict directly, circumventing obspy's init functions.
+    Speedup: from 37 us to 12 us per trace - 3x faster
+
+    :type trace: :class:`obspy.core.trace.Trace`
+    :param trace: Stream to quickly copy
+    :type deepcopy_data: bool
+    :param deepcopy_data:
+        Whether to deepcopy trace data (with `deepcopy_data=False` expect up to
+        20 % speedup, but use only when you know that data trace contents will
+        not change or affect results). Warning: do not use this option to copy
+        traces with processing history or response information.
+    :rtype: :class:`obspy.core.trace.Trace`
+    return: trace
+    """
+    new_trace = Trace()
+    for key, value in trace.__dict__.items():
+        if key == 'stats':
+            new_stats = new_trace.stats
+            for key_2, value_2 in value.__dict__.items():
+                if isinstance(value_2, UTCDateTime):
+                    new_stats.__dict__[key_2] = UTCDateTime(
+                        ns=value_2.__dict__['_UTCDateTime__ns'])
+                else:
+                    new_stats.__dict__[key_2] = value_2
+        elif deepcopy_data:
+            # data needs to be deepcopied (and anything else, to be safe)
+            new_trace.__dict__[key] = copy.deepcopy(value)
+        else:  # No deepcopy, e.g. for NaN-traces with no effect on results
+            new_trace.__dict__[key] = value
+    return new_trace
+
+
+def _quick_copy_stream(stream, deepcopy_data=True):
+    """
+    Function to quickly copy a stream.
+    Speedup for simple trace:
+        from 112 us to 44 (35) us per 3-trace stream - 2.8x (3.2x) faster
+
+    Warning: use `deepcopy_data=False` (saves extra ~20 % time) only when the
+             changing the data in the stream later does not change results
+             (e.g., for NaN-trace or when data array will not be changed).
+
+    This is what takes longest (1 empty trace, total time to copy 27 us):
+    copy header: 18 us (vs create new empty header: 683 ns)
+    Two points that can speed up copying / creation:
+        1. circumvent trace.__init__ and trace.__set_attr__ by setting value
+           directly in trace's __dict__
+        2. when setting trace header, circumvent that Stats(header) is called
+           when header is already a Stats instance
+
+    :type stream: :class:`obspy.core.stream.Stream`
+    :param stream: Stream to quickly copy
+    :type deepcopy_data: bool
+    :param deepcopy_data:
+        Whether to deepcopy data (with `deepcopy_data=False` expect up to 20 %
+        speedup, but use only when you know that data trace contents will not
+        change or affect results).
+
+    :rtype: :class:`obspy.core.stream.Stream`
+    return: stream
+    """
+    new_traces = list()
+    for trace in stream:
+        new_traces.append(
+            _quick_copy_trace(trace, deepcopy_data=deepcopy_data))
+    return Stream(new_traces)
+
+
+def _stream_quick_select(stream, seed_id):
+    """
+    4x quicker selection of traces in stream by full Seed-ID. Does not support
+    wildcards or selection by network/station/location/channel alone.
+    """
+    net, sta, loc, chan = seed_id.split('.')
+    stream = Stream(
+        [tr for tr in stream
+         if (tr.stats.network == net and
+             tr.stats.station == sta and
+             tr.stats.location == loc and
+             tr.stats.channel == chan)])
+    return stream
+
+
 def _prep_data_for_correlation(stream, templates, template_names=None,
                                force_stream_epoch=True):
     """
@@ -789,13 +1073,17 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
         [key.split('.') + [i] for key, value in template_ids.items()
          for i in range(value)])
     seed_ids = [('.'.join(seed_id[0:-1]), seed_id[-1]) for seed_id in seed_ids]
+    Logger.info(f"Prepping for {len(seed_ids)} channels that share seed-ids "
+                f"between templates and stream")
+    Logger.debug(f"Shared seed-ids: {seed_ids}")
 
     for channel_number, seed_id in enumerate(template_ids.keys()):
         stream_data = np.zeros(stream_length, dtype=np.float32)
         stream_channel = stream.select(id=seed_id)
         if len(stream_channel) > 1:
-            raise NotImplementedError(
-                "Multiple channels in continuous data for {0}".format(seed_id))
+            msg = f"Multiple channels in continuous data for {seed_id}"
+            Logger.error(msg)
+            raise NotImplementedError(msg)
         stream_channel = stream_channel[0]
         if stream_channel.stats.npts == stream_length:
             stream_data = stream_channel.data
@@ -834,12 +1122,13 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
 
     # Initialize nan template for speed.
     nan_channel = np.full(template_length, np.nan, dtype=np.float32)
+    nan_channel = np.require(nan_channel, requirements=['C_CONTIGUOUS'])
     nan_template = Stream()
     for _seed_id in seed_ids:
         net, sta, loc, chan = _seed_id[0].split('.')
         nan_template += Trace(header=Stats({
             'network': net, 'station': sta, 'location': loc,
-            'channel': chan, 'starttime': UTCDateTime(),
+            'channel': chan, 'starttime': UTCDateTime(ns=0),
             'npts': template_length, 'sampling_rate': samp_rate}))
 
     # Remove templates with no matching channels
@@ -874,8 +1163,8 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
                 net, sta, loc, chan = earliest_templ_trace_id.split('.')
                 nan_template += Trace(header=Stats({
                     'network': net, 'station': sta, 'location': loc,
-                    'channel': chan, 'starttime': UTCDateTime(),
-                    'npts': template_length, 'sampling_rate': samp_rate}))
+                    'channel': chan, 'starttime': UTCDateTime(ns=0),
+                    'sampling_rate': samp_rate}))
                 stream_nan_data = np.full(
                     stream_length, np.nan, dtype=np.float32)
                 out_stream += Trace(
@@ -894,16 +1183,37 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
     for template_name in incomplete_templates:
         template = _out[template_name]
         template_starttime = min(tr.stats.starttime for tr in template)
-        out_template = nan_template.copy()
+        out_template = _quick_copy_stream(nan_template, deepcopy_data=False)
+
+        # Select traces very quickly: assume that trace order does not change,
+        # make dict of trace-ids and list of indices and use indices to select
+        stream_trace_id_dict = defaultdict(list)
+        for n, tr in enumerate(template.traces):
+            stream_trace_id_dict[tr.id].append(n)
+
         for channel_number, _seed_id in enumerate(seed_ids):
             seed_id, channel_index = _seed_id
-            template_channel = template.select(id=seed_id)
+            # Select all traces with same seed_id, based on indices for
+            # corresponding traces stored in stream_trace_id_dict
+            # Much quicker than: template_channel = template.select(id=seed_id)
+            template_channel = Stream([
+                template.traces[idx] for idx in stream_trace_id_dict[seed_id]])
             if len(template_channel) <= channel_index:
-                out_template[channel_number].data = nan_channel
-                out_template[channel_number].stats.starttime = \
-                    template_starttime
+                # out_template[channel_number].data = nan_channel  # quicker:
+                out_template.traces[channel_number].__dict__[
+                    'data'] = np.copy(nan_channel)
+                out_template.traces[channel_number].stats.__dict__[
+                    'npts'] = template_length
+                out_template.traces[channel_number].stats.__dict__[
+                    'starttime'] = template_starttime
+                out_template.traces[channel_number].stats.__dict__[
+                    'endtime'] = UTCDateTime(ns=int(
+                        round(template_starttime.ns
+                              + (template_length / samp_rate) * 1e9)))
             else:
-                out_template[channel_number] = template_channel[channel_index]
+                out_template.traces[channel_number] = template_channel.traces[
+                    channel_index]
+
         # If a template-trace matches a NaN-trace in the stream , then set
         # template-trace to NaN so that this trace does not appear in channel-
         # list of detections.
@@ -921,6 +1231,73 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
     return out_stream, out_templates
 
 
+def shortproc(st, lowcut, highcut, filt_order, samp_rate, parallel=False,
+              num_cores=False, starttime=None, endtime=None,
+              seisan_chan_names=False, fill_gaps=True, ignore_length=False,
+              ignore_bad_data=False, fft_threads=1):
+    """
+    Deprecated
+    """
+    Logger.warning("Shortproc is depreciated after 0.4.4 and will "
+                   "be removed in a future version. Use multi_process"
+                   " instead")
+    st = multi_process(
+        st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
+        samp_rate=samp_rate, parallel=parallel, num_cores=num_cores,
+        starttime=starttime, endtime=endtime, daylong=False,
+        seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps,
+        ignore_length=ignore_length, ignore_bad_data=ignore_bad_data)
+    return st
+
+
+def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime,
+            parallel=True, num_cores=False, ignore_length=False,
+            seisan_chan_names=False, fill_gaps=True, ignore_bad_data=False,
+            fft_threads=1):
+    """
+    Deprecated
+    """
+    Logger.warning("dayproc is depreciated after 0.4.4 and will be "
+                   "removed in a future version. Use multi_process instead")
+    st = multi_process(
+        st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
+        samp_rate=samp_rate, parallel=parallel, num_cores=num_cores,
+        starttime=starttime, endtime=None, daylong=True,
+        seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps,
+        ignore_length=ignore_length, ignore_bad_data=ignore_bad_data)
+    return st
+
+
+def process(tr, lowcut, highcut, filt_order, samp_rate,
+            starttime=False, clip=False, length=86400,
+            seisan_chan_names=False, ignore_length=False, fill_gaps=True,
+            ignore_bad_data=False, fft_threads=1):
+    """
+    Deprecated
+    """
+    Logger.warning("process is depreciated after 0.4.4 and will be removed "
+                   "in a future version. Use multi_process instead")
+    if length == 86400:
+        daylong = True
+    else:
+        daylong = False
+
+    endtime = None
+    if clip:
+        if not starttime:
+            starttime = tr.stats.starttime
+        elif not isinstance(starttime, UTCDateTime):
+            starttime = UTCDateTime(starttime)
+        endtime = starttime + length
+    st = multi_process(
+        st=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order,
+        samp_rate=samp_rate, parallel=False, num_cores=1,
+        starttime=starttime, endtime=endtime, daylong=daylong,
+        seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps,
+        ignore_length=ignore_length, ignore_bad_data=ignore_bad_data)
+    return st
+
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
diff --git a/eqcorrscan/utils/src/find_peaks.c b/eqcorrscan/utils/src/find_peaks.c
index 7fa4c6780..9fb4e381a 100644
--- a/eqcorrscan/utils/src/find_peaks.c
+++ b/eqcorrscan/utils/src/find_peaks.c
@@ -25,8 +25,8 @@ int decluster_dist_time(float *arr, long *indexes, float *distances,
                         float dist_thresh, unsigned int *out){
     // Takes a sorted array, with indexes as the time between events, and the
     // distances as a distance matrix sorted in the same way.
-    long i, j, step;
-    int keep, distance_index;
+    long i, j, step, distance_index;
+    int keep;
 
     if (fabs(arr[0]) < thresh){return 0;}
 
@@ -57,8 +57,8 @@ int decluster_dist_time_ll(float *arr, long long *indexes, float *distances,
                            float dist_thresh, unsigned int *out){
     // Takes a sorted array, with indexes as the time between events, and the
     // distances as a distance matrix sorted in the same way.
-    long long i, j, step;
-    int keep, distance_index;
+    long long i, j, step, distance_index;
+    int keep;
 
     if (fabs(arr[0]) < thresh){return 0;}
 
@@ -115,28 +115,37 @@ int decluster_ll(float *arr, long long *indexes, long long len,
     }
     return 0;
 }
-
-int multi_decluster_ll(float *arr, long long *indices,
-                       long long *lengths, int n, float *thresholds,
-                       long long trig_int, unsigned int *out, int threads){
-    int i, ret_val = 0;
-    long long * start_inds = (long long *) calloc(n, sizeof(long long));
-    long long start_ind = 0;
-
-    for (i = 0; i < n; ++i){
-        start_inds[i] = start_ind;
-        start_ind += lengths[i];
-    }
-
-    #pragma omp parallel for num_threads(threads)
-    for (i = 0; i < n; ++i){
-        ret_val += decluster_ll(
-            &arr[start_inds[i]], &indices[start_inds[i]], lengths[i], thresholds[i],
-            trig_int, &out[start_inds[i]]);
-    }
-    free(start_inds);
-    return ret_val;
-}
+// Unused - not maintained from 01/12/2023
+//int multi_decluster_ll(float *arr, long long *indices,
+//                       long long *lengths, int n, float *thresholds,
+//                       long long trig_int, unsigned int *out, int threads){
+//    int i, ret_val = 0;
+//    long long * start_inds = (long long *) calloc(n, sizeof(long long));
+//    long long start_ind = 0;
+//
+//    for (i = 0; i < n; ++i){
+//        start_inds[i] = start_ind;
+//        start_ind += lengths[i];
+//    }
+//
+//    #ifdef N_THREADS
+//    if (threads > N_THREADS){
+//        printf("MULTI-DECLUSTER-LL: Setting threads to %i. OMP found %i threads\n", N_THREADS, omp_get_max_threads());
+//        threads = N_THREADS;
+//    }
+//    #else
+//    threads = 1;
+//    #endif
+//
+//    #pragma omp parallel for num_threads(threads)
+//    for (i = 0; i < n; ++i){
+//        ret_val += decluster_ll(
+//            &arr[start_inds[i]], &indices[start_inds[i]], lengths[i], thresholds[i],
+//            trig_int, &out[start_inds[i]]);
+//    }
+//    free(start_inds);
+//    return ret_val;
+//}
 
 
 // Functions for longs - should be the same logic as above
@@ -171,27 +180,35 @@ int decluster(float *arr, long *indexes, long len,
     return 0;
 }
 
-int multi_decluster(float *arr, long *indices,
-                    long *lengths, int n, float *thresholds,
-                    long trig_int, unsigned int *out, int threads){
-    int i, ret_val = 0;
-    long * start_inds = (long *) calloc(n, sizeof(long));
-    long start_ind = 0;
-
-    for (i = 0; i < n; ++i){
-        start_inds[i] = start_ind;
-        start_ind += lengths[i];
-    }
-
-    #pragma omp parallel for num_threads(threads)
-    for (i = 0; i < n; ++i){
-        ret_val += decluster(
-            &arr[start_inds[i]], &indices[start_inds[i]], lengths[i], thresholds[i],
-            trig_int, &out[start_inds[i]]);
-    }
-    free(start_inds);
-    return ret_val;
-}
+// Unused - not maintained from 01/12/2023
+//int multi_decluster(float *arr, long *indices,
+//                    long *lengths, int n, float *thresholds,
+//                    long trig_int, unsigned int *out, int threads){
+//    int i, ret_val = 0;
+//    long * start_inds = (long *) calloc(n, sizeof(long));
+//    long start_ind = 0;
+//
+//    for (i = 0; i < n; ++i){
+//        start_inds[i] = start_ind;
+//        start_ind += lengths[i];
+//    }
+//    #ifdef N_THREADS
+//    if (threads > N_THREADS){
+//        printf("MULTI-DECLUSTER\tMore threads requested than available (%i > %i). Caution\n", threads, N_THREADS);
+//;    }
+//    #else
+//    threads = 1;
+//    #endif
+//
+//    #pragma omp parallel for num_threads(threads)
+//    for (i = 0; i < n; ++i){
+//        ret_val += decluster(
+//            &arr[start_inds[i]], &indices[start_inds[i]], lengths[i], thresholds[i],
+//            trig_int, &out[start_inds[i]]);
+//    }
+//    free(start_inds);
+//    return ret_val;
+//}
 
 
 int find_peaks(float *arr, long len, float thresh, unsigned int *peak_positions){
@@ -221,24 +238,3 @@ int find_peaks(float *arr, long len, float thresh, unsigned int *peak_positions)
     }
     return 0;
 }
-
-
-int multi_find_peaks(float *arr, long len, int n, float *thresholds, int threads,
-                     unsigned int *peak_positions){
-    int i, ret_val = 0;
-    long * start_inds = (long *) calloc(n, sizeof(long));
-    long start_ind = 0;
-
-    for (i = 0; i < n; ++i){
-        start_inds[i] = start_ind;
-        start_ind += len;
-    }
-
-    #pragma omp parallel for num_threads(threads)
-    for (i = 0; i < n; ++i){
-        ret_val += find_peaks(&arr[start_inds[i]], len, thresholds[i], &peak_positions[start_inds[i]]);
-    }
-
-    free(start_inds);
-    return ret_val;
-}
\ No newline at end of file
diff --git a/eqcorrscan/utils/src/libutils.def b/eqcorrscan/utils/src/libutils.def
index cf7bcc5da..412a02d2d 100644
--- a/eqcorrscan/utils/src/libutils.def
+++ b/eqcorrscan/utils/src/libutils.def
@@ -1,13 +1,10 @@
 LIBRARY libutils.pyd
 EXPORTS
     find_peaks
-    multi_find_peaks
     decluster
     decluster_ll
     decluster_dist_time
     decluster_dist_time_ll
-    multi_decluster
-    multi_decluster_ll
     normxcorr_fftw
     normxcorr_fftw_threaded
     normxcorr_time
diff --git a/eqcorrscan/utils/src/libutils.h b/eqcorrscan/utils/src/libutils.h
index e076c9460..52a084561 100644
--- a/eqcorrscan/utils/src/libutils.h
+++ b/eqcorrscan/utils/src/libutils.h
@@ -58,27 +58,18 @@ int decluster_dist_time(float*, long*, float*, long, float, long, float, unsigne
 
 int decluster_ll(float*, long long*, long long, float, long long, unsigned int*);
 
-int multi_decluster_ll(float*, long long*, long long*, int, float*, long long, unsigned int*, int);
-
 int decluster(float*, long*, long, float, long, unsigned int*);
 
-int multi_decluster(float*, long*, long*, int, float*, long, unsigned int*, int);
-
 int findpeaks(float*, long, float, unsigned int*);
 
-int multi_find_peaks(float*, long, int, float*, int, unsigned int*);
-
 // multi_corr functions
+int running_mean_var(double*, double*, int*, float*, long, long);
+
 int normxcorr_fftw_main(float*, long, long, float*, long, int, int, float*, long,
                         float*, float*, float*, fftwf_complex*, fftwf_complex*,
                         fftwf_complex*, fftwf_plan, fftwf_plan, fftwf_plan,
                         int*, int*, int, int*, int*, int);
 
-int normxcorr_fftw_internal(
-    long, long, float*, long, int, int, float*, long, long, float*, float*, float*,
-    float*, fftwf_complex*, fftwf_complex*, fftwf_complex*, fftwf_plan,
-    fftwf_plan, int*, int*, int, int*, int*, int, long);
-
 int normxcorr_fftw_threaded(
     float*, long, long, float*, long, float*, long, int*, int*, int*, int*);
 
@@ -92,7 +83,7 @@ void free_fftw_arrays(
 
 int multi_normxcorr_fftw(
     float*, long, long, long, float*, long, float*, long, int*, int*, int,
-    int*, int*, int);
+    int, int*, int*, int);
 
 int normxcorr_fftw(float*, long, long, float*, long, float*, long, int*, int*, int*, int*);
 
diff --git a/eqcorrscan/utils/src/multi_corr.c b/eqcorrscan/utils/src/multi_corr.c
index 426d5707c..5eca6261b 100644
--- a/eqcorrscan/utils/src/multi_corr.c
+++ b/eqcorrscan/utils/src/multi_corr.c
@@ -278,9 +278,13 @@ int normxcorr_fftw_main(
     missed_corr:    Pointer to array to store warnings for unused correlations
     stack_option:   Whether to stack correlograms (1) or leave as individual channels (0),
   */
-    long i, t, chunk, n_chunks, chunk_len, startind, step_len;
-    int status = 0;
+//    double tic, toc, super_tic, super_toc;
+    long i, t, chunk, n_chunks, chunk_len, startind = template_len - 1, offset, step_len;
+    int status = 0, N2 = fft_len / 2 + 1, unused_corr = 0;
     float * norm_sums = (float *) calloc(n_templates, sizeof(float));
+    int * flatline_count = (int *) calloc(image_len - template_len + 1, sizeof(int));
+    double * mean = (double*) malloc((image_len - template_len + 1) * sizeof(double));
+    double * var = (double*) malloc((image_len - template_len + 1) * sizeof(double));
 
     if (norm_sums == NULL) {
         printf("ERROR: Error allocating norm_sums in normxcorr_fftw_main\n");
@@ -302,7 +306,10 @@ int normxcorr_fftw_main(
     }
 
     //  Compute fft of template
+//    tic = omp_get_wtime();
     fftwf_execute_dft_r2c(pa, template_ext, outa);
+//    toc = omp_get_wtime();
+//    printf("Template ffts took \t\t%f s\n", toc - tic);
 
     if (fft_len >= image_len){
         n_chunks = 1;
@@ -314,109 +321,112 @@ int normxcorr_fftw_main(
         n_chunks = (image_len - chunk_len) / step_len + ((image_len - chunk_len) % step_len > 0);
         if (n_chunks * step_len < image_len){n_chunks += 1;}
     }
+
+    // Procedures for normalisation
+    // TODO: Run this as a parallel section
+//    tic = omp_get_wtime();
+    running_mean_var(mean, var, flatline_count, image, image_len, template_len);
+//    toc = omp_get_wtime();
+//    printf("Running mean took \t\t%f s\n", toc - tic);
+
+//    super_tic = omp_get_wtime();
     for (chunk = 0; chunk < n_chunks; ++chunk){
-        startind = chunk * step_len;
-        if (startind + chunk_len > image_len){
-            chunk_len = image_len - startind;}
+        offset = chunk * step_len;
+        if (offset + chunk_len > image_len){
+            chunk_len = image_len - offset;}
 
         memset(image_ext, 0, (size_t) fft_len * sizeof(float));
-        for (i = 0; i < chunk_len; ++i){image_ext[i] = image[startind + i];}
-        status += normxcorr_fftw_internal(
-            template_len, n_templates, &image[startind], chunk_len, chan,
-            n_chans, &ncc[0], image_len, fft_len, template_ext,
-            image_ext, norm_sums, ccc, outa, outb, out, pb, px, used_chans,
-            pad_array, num_threads, variance_warning, missed_corr,
-            stack_option, startind);
+        for (i = 0; i < chunk_len; ++i){image_ext[i] = image[offset + i];}
+
+        // Forward FFT
+//        tic = omp_get_wtime();
+        fftwf_execute_dft_r2c(pb, image_ext, outb);
+//        toc = omp_get_wtime();
+//        printf("Chunk FFT took \t\t%f s\n", toc - tic);
+
+        // Dot product
+//        tic = omp_get_wtime();
+        #pragma omp parallel for num_threads(num_threads) private(i)
+        for (t = 0; t < n_templates; ++t){
+            for (i = 0; i < N2; ++i)
+            {
+                out[(t * N2) + i][0] = outa[(t * N2) + i][0] * outb[i][0] - outa[(t * N2) + i][1] * outb[i][1];
+                out[(t * N2) + i][1] = outa[(t * N2) + i][0] * outb[i][1] + outa[(t * N2) + i][1] * outb[i][0];
+            }
+        }
+//        toc = omp_get_wtime();
+//        printf("Dot product took \t\t%f s\n", toc - tic);
+
+        //  Compute inverse fft
+//        tic = omp_get_wtime();
+        fftwf_execute_dft_c2r(px, out, ccc);
+//        toc = omp_get_wtime();
+//        printf("Inverse FFT took \t\t%f s\n", toc - tic);
+
+        // Centre and normalise
+
+//        tic = omp_get_wtime();
+        if (var[offset] >= ACCEPTED_DIFF) {
+            double stdev = sqrt(var[offset]);
+            for (t = 0; t < n_templates; ++t){
+                double c = ((ccc[(t * fft_len) + startind] / (fft_len * n_templates)) - norm_sums[t] * mean[offset]);
+                c /= stdev;
+                status += set_ncc(t, offset, chan, n_chans, template_len, image_len,
+                                  (float) c, used_chans, pad_array, ncc, stack_option);
+            }
+            if (var[offset] <= WARN_DIFF){
+                variance_warning[0] = 1;
+            }
+        } else {
+            unused_corr += 1;
+        }
+
+        // Center and divide by length to generate scaled convolution
+        #pragma omp parallel for reduction(+:status,unused_corr) num_threads(num_threads) private(t)
+        for(i = 1; i < (chunk_len - template_len + 1); ++i){
+            if (var[offset + i] >= ACCEPTED_DIFF && flatline_count[offset + i] < template_len - 1) {
+                double stdev = sqrt(var[offset + i]);
+                double meanstd = fabs(mean[offset + i] * stdev);
+                if (meanstd >= ACCEPTED_DIFF){
+                    for (t = 0; t < n_templates; ++t){
+                        double c = ((ccc[(t * fft_len) + i + startind] / (fft_len * n_templates)) - norm_sums[t] * mean[offset + i]);
+                        c /= stdev;
+                        status += set_ncc(t, i + offset, chan, n_chans, template_len,
+                                          image_len, (float) c, used_chans,
+                                          pad_array, ncc, stack_option);
+                    }
+                }
+                else {
+                    unused_corr += 1;
+                }
+                if (var[offset + i] <= WARN_DIFF){
+                    variance_warning[0] += 1;
+                }
+            } else {
+                unused_corr += 1;
+            }
+        }
+        missed_corr[0] += unused_corr;
+//        toc = omp_get_wtime();
+//        printf("Normalising took \t\t%f s\n", toc - tic);
     }
+//    super_toc = omp_get_wtime();
+//    printf("Looping over chunks took \t\t%f s\n", super_toc - super_tic);
+    free(mean);
+    free(var);
+    free(flatline_count);
     free(norm_sums);
     return status;
 }
 
-int normxcorr_fftw_internal(
-    long template_len, long n_templates, float *image, long image_len,
-    int chan, int n_chans, float *ncc, long ncc_len, long fft_len,
-    float *template_ext, float *image_ext, float *norm_sums, float *ccc,
-    fftwf_complex *outa, fftwf_complex *outb, fftwf_complex *out,
-    fftwf_plan pb, fftwf_plan px, int *used_chans, int *pad_array,
-    int num_threads, int *variance_warning, int *missed_corr,
-    int stack_option, long offset)
-{
-  /*
-    Internal function for chunking cross-correlations
-    template_len:   Length of template
-    n_templates:    Number of templates
-    image:          Image signal (to scan through) - in this case this is a pointer to the starting index of the
-                    image for this chunk
-    image_len:      Length of image chunk (not complete length of image)
-    chan:           Channel number - used for stacking, otherwise set to 0
-    n_chans:        Number of channels - used for stacking, otherwise set to 1
-    ncc:            Output for cross-correlation - should be pointer to memory. This should be the whole ncc, not just
-                    the ncc starting at this chunk because padding requires negative indexing.
-                    Shapes and output determined by stack_option:
-        1:          Output stack correlograms, ncc must be
-                    (n_templates x image_len - template_len + 1) long.
-        0:          Output individual channel correlograms, ncc must be
-                    (n_templates x image_len - template_len + 1) long and initialised
-                    to zero before passing into this function.
-    ncc_len:        Total length of the ncc (not just the chunk).
-    fft_len:        Size for fft
-    template_ext:   Input FFTW array for template transform (must be allocated)
-    image_ext:      Input FFTW array for image transform (must be allocated)
-    norm_sums:      Normalised, summed templates
-    ccc:            Output FFTW array for reverse transform (must be allocated)
-    outa:           Output FFTW array for template transform (must be computed)
-    outb:           Output FFTW array for image transform (must be allocated)
-    out:            Input array for reverse transform (must be allocated)
-    pb:             Forward plan for image
-    px:             Reverse plan
-    used_chans:     Array to fill with number of channels used per template - must
-                    be n_templates long
-    pad_array:      Array of pads, should be n_templates long
-    num_threads:    Number of threads to parallel internal calculations over
-    variance_warning: Pointer to array to store warnings for variance issues
-    missed_corr:    Pointer to array to store warnings for unused correlations
-    stack_option:   Whether to stacked correlograms (1) or leave as individual channels (0),
-    offset:         Offset for position of chunk in ncc (for a pad of zero).
-  */
-    long i, t, startind;
-    long N2 = fft_len / 2 + 1;
-    int status = 0, unused_corr = 0;
-    int * flatline_count = (int *) calloc(image_len - template_len + 1, sizeof(int));
-    double *mean, *var;
-    double new_samp, old_samp, sum=0.0;
 
-    // Compute fft of image
-    fftwf_execute_dft_r2c(pb, image_ext, outb);
-
-    //  Compute dot product
-    #pragma omp parallel for num_threads(num_threads) private(i)
-    for (t = 0; t < n_templates; ++t){
-        for (i = 0; i < N2; ++i)
-        {
-            out[(t * N2) + i][0] = outa[(t * N2) + i][0] * outb[i][0] - outa[(t * N2) + i][1] * outb[i][1];
-            out[(t * N2) + i][1] = outa[(t * N2) + i][0] * outb[i][1] + outa[(t * N2) + i][1] * outb[i][0];
-        }
-    }
+int running_mean_var(
+    double *mean, double *var, int *flatline_count, float *image, long image_len,
+    long template_len)
+{
+    long i;
+    double sum, new_samp, old_samp;
 
-    //  Compute inverse fft
-    fftwf_execute_dft_c2r(px, out, ccc);
- 
-    // Allocate mean and var arrays
-    mean = (double*) malloc((image_len - template_len + 1) * sizeof(double));
-    if (mean == NULL) {
-        printf("ERROR: Error allocating mean in normxcorr_fftw_internal\n");
-        free(norm_sums);
-        return 1;
-    }
-    var = (double*) malloc((image_len - template_len + 1) * sizeof(double));
-    if (var == NULL) {
-        printf("ERROR: Error allocating var in normxcorr_fftw_internal\n");
-        free(norm_sums);
-        free(mean);
-        return 1;
-    }
-    
-    // Procedures for normalisation
     // Compute starting mean, will update this
     sum = 0.0;
     for (i=0; i < template_len; ++i){
@@ -431,22 +441,6 @@ int normxcorr_fftw_internal(
     }
     var[0] = sum;
 
-    // Used for centering - taking only the valid part of the cross-correlation
-    startind = template_len - 1;
-    if (var[0] >= ACCEPTED_DIFF) {
-        double stdev = sqrt(var[0]);
-        for (t = 0; t < n_templates; ++t){
-            double c = ((ccc[(t * fft_len) + startind] / (fft_len * n_templates)) - norm_sums[t] * mean[0]);
-            c /= stdev;
-            status += set_ncc(t, offset, chan, n_chans, template_len, ncc_len,
-                              (float) c, used_chans, pad_array, ncc, stack_option);
-        }
-        if (var[0] <= WARN_DIFF){
-            variance_warning[0] = 1;
-        }
-    } else {
-        unused_corr += 1;
-    }
 
     // pre-compute the mean and var so we can parallelise the calculation
     for(i = 1; i < (image_len - template_len + 1); ++i){
@@ -463,39 +457,7 @@ int normxcorr_fftw_internal(
             flatline_count[i] = 0;
         }
     }
-
-    // Center and divide by length to generate scaled convolution
-    #pragma omp parallel for reduction(+:status,unused_corr) num_threads(num_threads) private(t)
-    for(i = 1; i < (image_len - template_len + 1); ++i){
-        if (var[i] >= ACCEPTED_DIFF && flatline_count[i] < template_len - 1) {
-            double stdev = sqrt(var[i]);
-            double meanstd = fabs(mean[i] * stdev);
-            if (meanstd >= ACCEPTED_DIFF){
-                for (t = 0; t < n_templates; ++t){
-                    double c = ((ccc[(t * fft_len) + i + startind] / (fft_len * n_templates)) - norm_sums[t] * mean[i]);
-                    c /= stdev;
-                    status += set_ncc(t, i + offset, chan, n_chans, template_len,
-                                      ncc_len, (float) c, used_chans,
-                                      pad_array, ncc, stack_option);
-                }
-            }
-            else {
-                unused_corr += 1;
-            }
-            if (var[i] <= WARN_DIFF){
-                variance_warning[0] += 1;
-            }
-        } else {
-            unused_corr += 1;
-        }
-    }
-    missed_corr[0] += unused_corr;
-
-    //  Clean up
-    free(mean);
-    free(var);
-    free(flatline_count);
-    return status;
+    return 0;
 }
 
 static inline int set_ncc(
@@ -578,10 +540,10 @@ void free_fftw_arrays(int size, double **template_ext, double **image_ext, doubl
 
 int multi_normxcorr_fftw(float *templates, long n_templates, long template_len, long n_channels,
                          float *image, long image_len, float *ncc, long fft_len, int *used_chans,
-                         int *pad_array, int num_threads_inner, int *variance_warning, int *missed_corr,
-                         int stack_option)
+                         int *pad_array, int num_threads_inner, int num_threads_outer,
+                         int *variance_warning, int *missed_corr, int stack_option)
     {
-    int i, chan, n_chans, num_threads_outer=1;
+    int i, chan, n_chans;
     int r = 0;
     size_t N2 = (size_t) fft_len / 2 + 1;
     float **template_ext = NULL;
@@ -597,6 +559,12 @@ int multi_normxcorr_fftw(float *templates, long n_templates, long template_len,
     /* num_threads_outer cannot be greater than the number of channels */
     num_threads_outer = (num_threads_outer > n_channels) ? n_channels : num_threads_outer;
 
+    // Ensure stack-option is within supported range
+    if (stack_option > 1){
+        printf("Stack option %i unsupported, returning\n", stack_option);
+        return -1;
+    }
+
     /* Outer loop parallelism seems to cause issues on OSX */
     if (OUTER_SAFE != 1 && num_threads_outer > 1){
         printf("WARNING\tMULTI_NORMXCORR_FFTW\tOuter loop threading disabled for this system\n");
@@ -617,6 +585,7 @@ int multi_normxcorr_fftw(float *templates, long n_templates, long template_len,
 
     /* warn if the total number of threads is higher than the number of cores */
     if (num_threads_outer * num_threads_inner > N_THREADS) {
+        printf("WARNING: Requested %d inner and %d outer = %d total, but %d are available\n", num_threads_inner, num_threads_outer, num_threads_outer * num_threads_inner, N_THREADS);
         printf("WARNING: requesting more threads than available - this could negatively impact performance\n");
     }
     #else
@@ -624,6 +593,7 @@ int multi_normxcorr_fftw(float *templates, long n_templates, long template_len,
     num_threads_outer = 1;
     num_threads_inner = 1;
     #endif
+//    printf("Using %d outer threads and %d inner threads\n", num_threads_outer, num_threads_inner);
 
     /* Check that stack-type is within range (0-1) */
     if (stack_option > 1) {
@@ -734,7 +704,7 @@ int multi_normxcorr_fftw(float *templates, long n_templates, long template_len,
     px = fftwf_plan_dft_c2r_2d(n_templates, fft_len, out[0], ccc[0], FFTW_ESTIMATE);
 
     /* loop over the channels */
-    /* #pragma omp parallel for num_threads(num_threads_outer) */
+    #pragma omp parallel for num_threads(num_threads_outer)
     for (i = 0; i < n_channels; ++i){
         int tid = 0; /* each thread has its own workspace */
 
@@ -769,23 +739,6 @@ int multi_normxcorr_fftw(float *templates, long n_templates, long template_len,
     for (i = 0; i < n_channels; ++i){
         r += results[i];
     }
-//        if (results[i] != 999 && results[i] != 0){
-//            // Some error internally, must catch this
-//            r += results[i];
-//        } else if (results[i] == 999 && r == 0){
-//            // First time unused correlation raised and no prior errors
-//            r = results[i];
-//        } else if (r == 999 && results[i] == 999){
-//            // Unused correlations raised multiple times
-//            r = 999;
-//        } else if (r == 999 && results[i] != 999){
-//            // Some error internally.
-//            r += results[i];
-//        } else if (r != 0){
-//            // Any other error
-//            r += results[i];
-//        }
-//    }
     free(results);
     /* free fftw memory */
     free_fftwf_arrays(num_threads_outer, template_ext, image_ext, ccc, outa, outb, out);
diff --git a/pytest.ini b/pytest.ini
index 8919a49e0..0d8cabaf2 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,8 @@
 [pytest]
 norecursedirs=eqcorrscan/doc .* eqcorrscan/scripts build pyasdf eqcorrscan/utils/lib eqcorrscan/utils/src
 addopts = --cov=eqcorrscan --cov-config .coveragerc --ignore=setup.py --doctest-modules -p no:warnings
+log_format = %(asctime)s %(name)s %(levelname)s %(message)s
+log_date_format = %Y-%m-%d %H:%M:%S
+log_cli_level = INFO
+log_cli_format = %(asctime)s %(name)s %(levelname)s %(message)s
+log_cli_date_format = %Y-%m-%d %H:%M:%S
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index ab086ebd6..57e51617b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,6 @@ matplotlib>=1.3.0
 scipy>=0.18,<1.9.0  # Pinned due to scipy/obspy hanning renaming
 bottleneck
 obspy>=1.3.0  # ObsPy <1.3.0 is incompatible with numpy >= 1.22: https://github.com/obspy/obspy/issues/2912
-pyfftw  # PyFFTW 0.13 on conda has a build issue: https://github.com/conda-forge/pyfftw-feedstock/issues/51
 h5py
 pytest>=2.0.0
 pytest-cov
diff --git a/setup.py b/setup.py
index 4273d8482..197f24875 100644
--- a/setup.py
+++ b/setup.py
@@ -385,8 +385,9 @@ def setup_package():
             'Topic :: Scientific/Engineering',
             'License :: OSI Approved :: GNU Library or Lesser General Public '
             'License (LGPL)',
-            'Programming Language :: Python :: 3.6',
-            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
+            'Programming Language :: Python :: 3.9',
+            'Programming Language :: Python :: 3.10',
         ],
         'keywords': 'earthquake correlation detection match-filter',
         'scripts': scriptfiles,
@@ -398,7 +399,8 @@ def setup_package():
         'cmdclass': {'build_ext': CustomBuildExt},
         'packages': [
             'eqcorrscan', 'eqcorrscan.utils', 'eqcorrscan.core',
-            'eqcorrscan.core.match_filter', 'eqcorrscan.utils.lib',
+            'eqcorrscan.core.match_filter',
+            'eqcorrscan.core.match_filter.helpers', 'eqcorrscan.utils.lib',
             'eqcorrscan.tutorials', 'eqcorrscan.helpers', 'eqcorrscan.tests'],
     }