diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 124d2be0..31698710 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,6 +3,7 @@ name: Build container image on: release: branches: [ master ] + workflow_dispatch: jobs: build: diff --git a/.github/workflows/testdev.yml b/.github/workflows/testdev.yml index 2c406f0b..58c94776 100644 --- a/.github/workflows/testdev.yml +++ b/.github/workflows/testdev.yml @@ -9,32 +9,30 @@ on: workflow_dispatch: jobs: - build: - runs-on: ubuntu-20.04 + test: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} steps: - - name: checkout files in repo - uses: actions/checkout@v2 + - name: Checkout files in repo + uses: actions/checkout@v3 - - name: Setup Python 3.9 - uses: actions/setup-python@v2 + - name: Setup Miniconda + uses: conda-incubator/setup-miniconda@v2 with: - python-version: 3.9 + activate-environment: pyleo + environment-file: environment.yml + python-version: "3.10" + auto-activate-base: false - - name: Install dependencies + - name: Conda list run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - $CONDA/bin/conda env update --file environment.yml --name base - - - name: Lint with flake - run: | - $CONDA/bin/conda install flake8 - # stop the build if there are Python syntax errors or undefined names - $CONDA/bin/flake8 . --count --select=E9,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - $CONDA/bin/flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + conda activate pyleo + conda list - name: Test with pytest run: | - conda install pytest - $CONDA/bin/pytest + conda activate pyleo + pytest pyleoclim/tests -svv diff --git a/.github/workflows/testmaster.yml b/.github/workflows/testmaster.yml index 1aa65164..d3d180cc 100644 --- a/.github/workflows/testmaster.yml +++ b/.github/workflows/testmaster.yml @@ -9,32 +9,29 @@ on: workflow_dispatch: jobs: - build: - runs-on: ubuntu-20.04 + test: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} steps: - - name: checkout files in repo - uses: actions/checkout@v2 + - name: Checkout files in repo + uses: actions/checkout@v3 - - name: Setup Python 3.9 - uses: actions/setup-python@v2 + - name: Setup Miniconda + uses: conda-incubator/setup-miniconda@v2 with: - python-version: 3.9 + activate-environment: pyleo + environment-file: environment.yml + python-version: "3.10" + auto-activate-base: false - - name: Install dependencies + - name: Conda list run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - $CONDA/bin/conda env update --file environment.yml --name base - - - name: Lint with flake - run: | - $CONDA/bin/conda install flake8 - # stop the build if there are Python syntax errors or undefined names - $CONDA/bin/flake8 . --count --select=E9,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - $CONDA/bin/flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - + conda activate pyleo + conda list - name: Test with pytest run: | - conda install pytest - $CONDA/bin/pytest + conda activate pyleo + pytest pyleoclim/tests -svv diff --git a/doc_build/anaconda_install.rst b/doc_build/anaconda_install.rst index 349ba274..bd648176 100644 --- a/doc_build/anaconda_install.rst +++ b/doc_build/anaconda_install.rst @@ -4,7 +4,7 @@ Installing Miniconda (Mac, Linux) ================================= Anaconda is a very large install, containing not only Python, but also R, and a host of other things that are not necessary to run pyleoclim. -Users may find it preferable to install the more minimalist "miniconda" package. +Users may find it preferable to install the minimalist "miniconda" package. Step 1: Download the installation script for miniconda3 """""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -42,6 +42,6 @@ Step 3: Test your Installation .. code-block:: bash - source ~/.bashrc # assume you are using Bash shell + source ~/.bashrc # assumes you are using Bash shell which python # should return a path under which conda # should return a path under diff --git a/doc_build/citation.rst b/doc_build/citation.rst index 5bdfd62e..dff6d8e1 100644 --- a/doc_build/citation.rst +++ b/doc_build/citation.rst @@ -3,7 +3,7 @@ Citing Pyleoclim ================ -If Pyleoclim played an important part in your research, please add us to your reference list by using one of the citations below: +If Pyleoclim played any role in your research, please add us to your reference list by using at least one of the citations below (two if you're really grateful): BibTeX entry """"""""""""" @@ -13,16 +13,26 @@ For example (please check for version updates on Zenodo) @misc{Pyleoclim, DOI= {10.5281/zenodo.1205661}, url = {https://doi.org/10.5281/zenodo.1205661}, - year = {2021}, - type = {Computer Program} + year = {2022}, + type = {Computer Program}, title = {Pyleoclim: A Python package for the analysis of paleoclimate data}, authors = {Deborah Khider and Feng Zhu and Julien Emile-Geay and Jun Hu and Alexander James and Pratheek Athreya and Myron Kwan and Daniel Garijo} } +@article{Pyleoclim:PP2022, + author = {Khider, Deborah and Emile-Geay, Julien and Zhu, Feng and James, Alexander and Landers, Jordan and Ratnakar, Varun and Gil, Yolanda}, + journal = {Paleoceanography and Paleoclimatology}, + number = {10}, + pages = {e2022PA004509}, + title = {Pyleoclim: Paleoclimate Timeseries Analysis and Visualization With Python}, + volume = {37}, + year = {2022}, + doi = {10.1029/2022PA004509}, +} + + AGU style entry """"""""""""""" -Khider, D. et al (2021). Pyleoclim: A Python package for the analysis of paleoclimate data. Retrieved from https://doi.org/10.5281/zenodo.1205661 - -A manuscript describing Pyleoclim is currently in preprint on the `Earth and Space Science Open Archive `_. +Khider, D., Emile-Geay, J., Zhu, F., James, A., Landers, J., Ratnakar, V., & Gil, Y. (2022). Pyleoclim: Paleoclimate timeseries analysis and visualization with Python. Paleoceanography and Paleoclimatology, 37, e2022PA004509. doi:10.1029/2022PA004509 diff --git a/doc_build/installation.rst b/doc_build/installation.rst index 7fc55d4f..c4bbdb9f 100644 --- a/doc_build/installation.rst +++ b/doc_build/installation.rst @@ -2,7 +2,7 @@ .. note:: - Pyleoclim requires the use of Python 3.8 or 3.9. + Pyleoclim requires the use of Python 3.9 or above Installing Pyleoclim ==================== @@ -20,11 +20,11 @@ Click :ref:`here ` for a quick tutorial on MacOs and Linu Creating a new conda environment """"""""""""""""""""""""""""""""""" -To create a new environment using Python 3.9 via command line: +To create a new environment using Python 3.10 via command line: .. code-block:: bash - conda create -n pyleo python=3.9 + conda create -n pyleo python=3.10 To view a list of available environment: @@ -60,8 +60,9 @@ First install Cartopy: .. code-block:: bash - conda install -c conda-forge cartopy + conda install -c conda-forge cartopy=0.21 +(note: under Python 3.10, cartopy>=0.21 will be installed by default, so specifying this version is unnecessary). Then install Pyleoclim through Pypi, which contains the most stable version of Pyleoclim: .. code-block:: bash @@ -121,10 +122,18 @@ Again, unless you are planning to make heavy use of the WWZ functionality, we re Docker Container """""""""""""""" -Docker containers with various versions of Pyleoclim are available at: `https://quay.io/repository/2i2c/paleohack-2021?tab=tags `_. +Docker containers with various versions of Pyleoclim are available `on quay.io `_. To pull an image: .. code-block:: bash - docker pull quay.io/2i2c/paleohack-2021:latest + docker pull quay.io/linkedearth/pyleoclim:latest + +To run the image: + +.. code-block:: bash + + docker run -it -p 8888:8888 quay.io/linkedearth/pyleoclim:latest + +The container will start a Jupyter server automatically. You need to copy the link to the server (localhost) into your web browser on your machine (the command -p 8888:8888 opens the communication port between your machine and the container). You can then create notebook and upload notebook and data using the Jupyter interface. Remember that the container will not save any of your work if you close it. So make sure you donwload your work before closing the container. diff --git a/doc_build/rtd_env.yml b/doc_build/rtd_env.yml index 5b4c27a0..3a148453 100644 --- a/doc_build/rtd_env.yml +++ b/doc_build/rtd_env.yml @@ -3,9 +3,9 @@ channels: - conda-forge - defaults dependencies: - - python=3.9 + - python=3.10 - numpy - - cartopy>=0.19.0 + - cartopy=0.21 - pip: - sphinx - pylint @@ -14,4 +14,4 @@ dependencies: - nbsphinx - IPython - readthedocs-sphinx-search>=0.1.0 - - pyleoclim==0.9.1 + - git+https://github.com/LinkedEarth/Pyleoclim_util.git@Development diff --git a/doc_build/savefig/coh.png b/doc_build/savefig/coh.png deleted file mode 100644 index ea026a62..00000000 Binary files a/doc_build/savefig/coh.png and /dev/null differ diff --git a/doc_build/savefig/mapallarchive.png b/doc_build/savefig/mapallarchive.png deleted file mode 100644 index ba367704..00000000 Binary files a/doc_build/savefig/mapallarchive.png and /dev/null differ diff --git a/doc_build/savefig/mapallarchive_marker.png b/doc_build/savefig/mapallarchive_marker.png deleted file mode 100644 index efcea71a..00000000 Binary files a/doc_build/savefig/mapallarchive_marker.png and /dev/null differ diff --git a/doc_build/savefig/mapone.png b/doc_build/savefig/mapone.png deleted file mode 100644 index 330f16ca..00000000 Binary files a/doc_build/savefig/mapone.png and /dev/null differ diff --git a/doc_build/savefig/mts_stackplot.png b/doc_build/savefig/mts_stackplot.png deleted file mode 100644 index 6f232388..00000000 Binary files a/doc_build/savefig/mts_stackplot.png and /dev/null differ diff --git a/doc_build/savefig/random_series.png b/doc_build/savefig/random_series.png deleted file mode 100644 index 38183686..00000000 Binary files a/doc_build/savefig/random_series.png and /dev/null differ diff --git a/doc_build/savefig/spec_ls.png b/doc_build/savefig/spec_ls.png deleted file mode 100644 index 8ecd8094..00000000 Binary files a/doc_build/savefig/spec_ls.png and /dev/null differ diff --git a/doc_build/savefig/spec_ls_freq.png b/doc_build/savefig/spec_ls_freq.png deleted file mode 100644 index 287decaa..00000000 Binary files a/doc_build/savefig/spec_ls_freq.png and /dev/null differ diff --git a/doc_build/savefig/spec_ls_n50.png b/doc_build/savefig/spec_ls_n50.png deleted file mode 100644 index 853f9dc0..00000000 Binary files a/doc_build/savefig/spec_ls_n50.png and /dev/null differ diff --git a/doc_build/savefig/spec_mtm.png b/doc_build/savefig/spec_mtm.png deleted file mode 100644 index a88d465b..00000000 Binary files a/doc_build/savefig/spec_mtm.png and /dev/null differ diff --git a/doc_build/savefig/spec_perio.png b/doc_build/savefig/spec_perio.png deleted file mode 100644 index 08d1ddfc..00000000 Binary files a/doc_build/savefig/spec_perio.png and /dev/null differ diff --git a/doc_build/savefig/spec_welch.png b/doc_build/savefig/spec_welch.png deleted file mode 100644 index 1da133c3..00000000 Binary files a/doc_build/savefig/spec_welch.png and /dev/null differ diff --git a/doc_build/savefig/spec_wwz.png b/doc_build/savefig/spec_wwz.png deleted file mode 100644 index 49a7254f..00000000 Binary files a/doc_build/savefig/spec_wwz.png and /dev/null differ diff --git a/doc_build/savefig/ssa_recon.png b/doc_build/savefig/ssa_recon.png deleted file mode 100644 index 8e2b8d7e..00000000 Binary files a/doc_build/savefig/ssa_recon.png and /dev/null differ diff --git a/doc_build/savefig/ts_air.png b/doc_build/savefig/ts_air.png deleted file mode 100644 index aec9e7e2..00000000 Binary files a/doc_build/savefig/ts_air.png and /dev/null differ diff --git a/doc_build/savefig/ts_dashboard.png b/doc_build/savefig/ts_dashboard.png deleted file mode 100644 index 4bb38de1..00000000 Binary files a/doc_build/savefig/ts_dashboard.png and /dev/null differ diff --git a/doc_build/savefig/ts_dist.png b/doc_build/savefig/ts_dist.png deleted file mode 100644 index 94e8d6e7..00000000 Binary files a/doc_build/savefig/ts_dist.png and /dev/null differ diff --git a/doc_build/savefig/ts_filter1.png b/doc_build/savefig/ts_filter1.png deleted file mode 100644 index 56377207..00000000 Binary files a/doc_build/savefig/ts_filter1.png and /dev/null differ diff --git a/doc_build/savefig/ts_filter2.png b/doc_build/savefig/ts_filter2.png deleted file mode 100644 index 503efd1c..00000000 Binary files a/doc_build/savefig/ts_filter2.png and /dev/null differ diff --git a/doc_build/savefig/ts_filter3.png b/doc_build/savefig/ts_filter3.png deleted file mode 100644 index 96c1f6f6..00000000 Binary files a/doc_build/savefig/ts_filter3.png and /dev/null differ diff --git a/doc_build/savefig/ts_filter4.png b/doc_build/savefig/ts_filter4.png deleted file mode 100644 index 35b70cea..00000000 Binary files a/doc_build/savefig/ts_filter4.png and /dev/null differ diff --git a/doc_build/savefig/ts_filter5.png b/doc_build/savefig/ts_filter5.png deleted file mode 100644 index 53b0dafd..00000000 Binary files a/doc_build/savefig/ts_filter5.png and /dev/null differ diff --git a/doc_build/savefig/ts_nino.png b/doc_build/savefig/ts_nino.png deleted file mode 100644 index c6d51776..00000000 Binary files a/doc_build/savefig/ts_nino.png and /dev/null differ diff --git a/doc_build/savefig/ts_plot.png b/doc_build/savefig/ts_plot.png deleted file mode 100644 index dd7603bb..00000000 Binary files a/doc_build/savefig/ts_plot.png and /dev/null differ diff --git a/doc_build/savefig/ts_plot2.png b/doc_build/savefig/ts_plot2.png deleted file mode 100644 index d38f5cad..00000000 Binary files a/doc_build/savefig/ts_plot2.png and /dev/null differ diff --git a/doc_build/savefig/ts_plot4.png b/doc_build/savefig/ts_plot4.png deleted file mode 100644 index dd7603bb..00000000 Binary files a/doc_build/savefig/ts_plot4.png and /dev/null differ diff --git a/doc_build/savefig/ts_plot5.png b/doc_build/savefig/ts_plot5.png deleted file mode 100644 index dd7603bb..00000000 Binary files a/doc_build/savefig/ts_plot5.png and /dev/null differ diff --git a/doc_build/savefig/ts_sg.png b/doc_build/savefig/ts_sg.png deleted file mode 100644 index 984df314..00000000 Binary files a/doc_build/savefig/ts_sg.png and /dev/null differ diff --git a/doc_build/savefig/wwa_wwz.png b/doc_build/savefig/wwa_wwz.png deleted file mode 100644 index bd42530a..00000000 Binary files a/doc_build/savefig/wwa_wwz.png and /dev/null differ diff --git a/doc_build/ts_plot3.png b/doc_build/ts_plot3.png index 8292aff1..ed6dea84 100644 Binary files a/doc_build/ts_plot3.png and b/doc_build/ts_plot3.png differ diff --git a/doc_build/utils/causality.rst b/doc_build/utils/causality.rst deleted file mode 100644 index 096b85fd..00000000 --- a/doc_build/utils/causality.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.causality \ No newline at end of file diff --git a/doc_build/utils/correlation.rst b/doc_build/utils/correlation.rst deleted file mode 100644 index 94633d1d..00000000 --- a/doc_build/utils/correlation.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.correlation \ No newline at end of file diff --git a/doc_build/utils/decomposition.rst b/doc_build/utils/decomposition.rst deleted file mode 100644 index bd4c41f5..00000000 --- a/doc_build/utils/decomposition.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.decomposition \ No newline at end of file diff --git a/doc_build/utils/filter.rst b/doc_build/utils/filter.rst deleted file mode 100644 index 7be9f3ff..00000000 --- a/doc_build/utils/filter.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.filter \ No newline at end of file diff --git a/doc_build/utils/introduction.rst b/doc_build/utils/introduction.rst index 585181b6..00506d1d 100644 --- a/doc_build/utils/introduction.rst +++ b/doc_build/utils/introduction.rst @@ -14,82 +14,98 @@ to values more appropriate for paleoclimate datasets. Causality """"""""" .. automodule:: pyleoclim.utils.causality - :members: liang_causality, granger_causality + :members: + :ignore-module-all: True + Correlation """"""""""" .. automodule:: pyleoclim.utils.correlation - :members: fdr, corr_sig + :members: + :ignore-module-all: True Decomposition """"""""""""" .. automodule:: pyleoclim.utils.decomposition - :members: ssa + :members: + :ignore-module-all: True Filter """""" .. automodule:: pyleoclim.utils.filter - :members: butterworth, savitzky_golay, firwin, lanczos + :members: + :ignore-module-all: True Mapping """"""" .. automodule:: pyleoclim.utils.mapping - :members: map, compute_distance + :members: + :ignore-module-all: True Plotting """""""" .. automodule:: pyleoclim.utils.plotting - :members: set_style, closefig, savefig + :members: + :ignore-module-all: True Spectral """""""" .. automodule:: pyleoclim.utils.spectral - :members: wwz_psd, cwt_psd, mtm, lomb_scargle, welch, periodogram + :members: + :ignore-module-all: True + Tsmodel """"""" .. automodule:: pyleoclim.utils.tsmodel - :members: ar1_sim, ar1_fit, colored_noise, colored_noise_2regimes, gen_ar1_evenly + :members: + :ignore-module-all: True + Wavelet """"""" .. automodule:: pyleoclim.utils.wavelet - :members: cwt, cwt_coherence, wwz, wwz_coherence + :members: + :ignore-module-all: True Tsutils """"""" .. automodule:: pyleoclim.utils.tsutils - :members: simple_stats, bin, interp, gkernel, standardize, ts2segments, annualize, gaussianize, gaussianize_1d, detrend, remove_outliers + :members: + :ignore-module-all: True + Tsbase """""" .. automodule:: pyleoclim.utils.tsbase - :members: clean_ts, dropna, sort_ts, is_evenly_spaced, reduce_duplicated_timestamps + :members: + :ignore-module-all: True + Lipdutils """"""""" -Utilities to manipulate LiPD files and automate data transformation whenever possible. -These functions are used throughout Pyleoclim but are not meant for direct interaction by users. -Also handles integration with the LinkedEarth wiki and the LinkedEarth Ontology. - +.. automodule:: pyleoclim.utils.lipdutils + :members: + :ignore-module-all: True jsonutils """"""""" .. automodule:: pyleoclim.utils.jsonutils - :members: PyleoObj_to_json, json_to_PyleoObj, isPyleoclim + :members: + :ignore-module-all: True diff --git a/doc_build/utils/jsonutils.rst b/doc_build/utils/jsonutils.rst deleted file mode 100644 index 1f43604b..00000000 --- a/doc_build/utils/jsonutils.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.jsonutils \ No newline at end of file diff --git a/doc_build/utils/lipdutils.rst b/doc_build/utils/lipdutils.rst deleted file mode 100644 index 5633d5cd..00000000 --- a/doc_build/utils/lipdutils.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.lipdutils \ No newline at end of file diff --git a/doc_build/utils/mapping.rst b/doc_build/utils/mapping.rst deleted file mode 100644 index 564d2136..00000000 --- a/doc_build/utils/mapping.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.mapping \ No newline at end of file diff --git a/doc_build/utils/plotting.rst b/doc_build/utils/plotting.rst deleted file mode 100644 index bb70d564..00000000 --- a/doc_build/utils/plotting.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.plotting \ No newline at end of file diff --git a/doc_build/utils/spectral.rst b/doc_build/utils/spectral.rst deleted file mode 100644 index a2251322..00000000 --- a/doc_build/utils/spectral.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.spectral \ No newline at end of file diff --git a/doc_build/utils/tsbase.rst b/doc_build/utils/tsbase.rst deleted file mode 100644 index 010e929f..00000000 --- a/doc_build/utils/tsbase.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.tsbase \ No newline at end of file diff --git a/doc_build/utils/tsmodel.rst b/doc_build/utils/tsmodel.rst deleted file mode 100644 index f4c44409..00000000 --- a/doc_build/utils/tsmodel.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.tsmodel \ No newline at end of file diff --git a/doc_build/utils/tsutils.rst b/doc_build/utils/tsutils.rst deleted file mode 100644 index 16d564de..00000000 --- a/doc_build/utils/tsutils.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.tsutils \ No newline at end of file diff --git a/doc_build/utils/wavelet.rst b/doc_build/utils/wavelet.rst deleted file mode 100644 index 2757639f..00000000 --- a/doc_build/utils/wavelet.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: pyleoclim.utils.wavelet \ No newline at end of file diff --git a/environment.yml b/environment.yml index 790a7dd6..376b3b4b 100644 --- a/environment.yml +++ b/environment.yml @@ -12,8 +12,9 @@ dependencies: - climlab=0.7.13 - numpy=1.22.0 - pip - - python=3.9.13 + - python=3.10 - libstdcxx-ng=12.1.0 + - pytest - pip: - pangaeapy - - pyleoclim==0.9.1 + - git+https://github.com/LinkedEarth/Pyleoclim_util.git@master diff --git a/pyleoclim/core/coherence.py b/pyleoclim/core/coherence.py index e7b1318b..400fdcbb 100644 --- a/pyleoclim/core/coherence.py +++ b/pyleoclim/core/coherence.py @@ -120,29 +120,52 @@ def plot(self, var='wtc', xlabel=None, ylabel=None, title='auto', figsize=[10, 8 Parameters ---------- + var : str {'wtc', 'xwt'} + variable to be plotted as color field. Default: 'wtc', the wavelet transform coherency. 'xwt' plots the cross-wavelet transform instead. + xlabel : str, optional + x-axis label. The default is None. + ylabel : str, optional + y-axis label. The default is None. + title : str, optional + Title of the plot. The default is 'auto', where it is made from object metadata. To mute, pass title = None. + figsize : list, optional + Figure size. The default is [10, 8]. + ylim : list, optional + y-axis limits. The default is None. + xlim : list, optional + x-axis limits. The default is None. + in_scale : bool, optional + + Plots scales instead of frequencies The default is True. + yticks : list, optional + y-ticks label. The default is None. + contourf_style : dict, optional + Arguments for the contour plot. The default is {}. + phase_style : dict, optional + Arguments for the phase arrows. The default is {}. It includes: - 'pt': the default threshold above which phase arrows will be plotted - 'skip_x': the number of points to skip between phase arrows along the x-axis @@ -150,44 +173,66 @@ def plot(self, var='wtc', xlabel=None, ylabel=None, title='auto', figsize=[10, 8 - 'scale': number of data units per arrow length unit (see matplotlib.pyplot.quiver) - 'width': shaft width in arrow units (see matplotlib.pyplot.quiver) - 'color': arrow color (see matplotlib.pyplot.quiver) + cbar_style : dict, optional + Arguments for the color bar. The default is {}. + savefig_settings : dict, optional + The default is {}. the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} + ax : ax, optional + Matplotlib axis on which to return the figure. The default is None. + signif_thresh: float in [0, 1] + Significance threshold. Default is 0.95. If this quantile is not found in the qs field of the Coherence object, the closest quantile will be picked. + signif_clr : str, optional + Color of the significance line. The default is 'white'. + signif_linestyles : str, optional + Style of the significance line. The default is '-'. + signif_linewidths : float, optional + Width of the significance line. The default is 1. + under_clr : str, optional + Color for under 0. The default is 'ivory'. + over_clr : str, optional + Color for over 1. The default is 'black'. + bad_clr : str, optional + Color for missing values. The default is 'dimgray'. Returns ------- + fig, ax See also -------- - pyleoclim.core.coherence.Coherence.dashboard + + pyleoclim.core.coherence.Coherence.dashboard : plots a a dashboard showing the coherence and the cross-wavelet transform. - pyleoclim.core.series.Series.wavelet_coherence + pyleoclim.core.series.Series.wavelet_coherence : computes the coherence from two timeseries. - matplotlib.pyplot.quiver + matplotlib.pyplot.quiver : quiver plot Examples -------- @@ -434,15 +479,19 @@ def dashboard(self, title=None, figsize=[9,12], phase_style = {}, ---------- title : str, optional + Title of the plot. The default is None. figsize : list, optional + Figure size. The default is [9, 12], as this is an information-rich figure. line_colors : list, optional + Colors for the 2 traces For nomenclature, see https://matplotlib.org/stable/gallery/color/named_colors.html savefig_settings : dict, optional + The default is {}. the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, @@ -450,6 +499,7 @@ def dashboard(self, title=None, figsize=[9,12], phase_style = {}, - "format" can be one of {"pdf", "eps", "png", "ps"} phase_style : dict, optional + Arguments for the phase arrows. The default is {}. It includes: - 'pt': the default threshold above which phase arrows will be plotted - 'skip_x': the number of points to skip between phase arrows along the x-axis @@ -459,25 +509,28 @@ def dashboard(self, title=None, figsize=[9,12], phase_style = {}, - 'color': arrow color (see matplotlib.pyplot.quiver) ts_plot_kwargs : dict + arguments to be passed to the timeseries subplot, see pyleoclim.core.series.Series.plot for details wavelet_plot_kwargs : dict + arguments to be passed to the contour subplots (XWT and WTC), [see pyleoclim.core.coherence.Coherence.plot for details] Returns ------- + fig, ax See also -------- - pyleoclim.core.coherence.Coherence.plot + pyleoclim.core.coherence.Coherence.plot : creates a coherence plot - pyleoclim.core.series.Series.wavelet_coherence + pyleoclim.core.series.Series.wavelet_coherence : computes the coherence between two timeseries. - pyleoclim.core.series.Series.plot + pyleoclim.core.series.Series.plot: plots a timeseries - matplotlib.pyplot.quiver + matplotlib.pyplot.quiver: makes a quiver plot Examples -------- @@ -593,23 +646,36 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting Parameters ---------- + number : int, optional + Number of surrogate series to create for significance testing. The default is 200. + method : {'ar1sim'}, optional + Method through which to generate the surrogate series. The default is 'ar1sim'. + seed : int, optional + Fixes the seed for NumPy's random number generator. Useful for reproducibility. The default is None, so fresh, unpredictable entropy will be pulled from the operating system. + qs : list, optional + Significance levels to return. The default is [0.95]. + settings : dict, optional + Parameters for surrogate model. The default is None. + mute_pbar : bool, optional + Mute the progress bar. The default is False. Returns ------- + new : pyleoclim.core.coherence.Coherence original Coherence object augmented with significance levels signif_qs, @@ -794,19 +860,25 @@ def phase_stats(self, scales, number=1000, level=0.05): Parameters ---------- + scales : float + scale at which to evaluate the phase angle number : int, optional + number of AR(1) series to create for significance testing. The default is 1000. level : float, optional + significance level against which to gauge sigma and kappa. default: 0.05 Returns ------- + result : dict + contains angle_mean (the mean angle for those scales), sigma (the circular standard deviation), kappa, sigma_lo (alpha-level quantile for sigma) and kappa_hi, the (1-alpha)-level quantile for kappa. @@ -829,6 +901,7 @@ def phase_stats(self, scales, number=1000, level=0.05): References ---------- + [1] Grinsted, A., J. C. Moore, and S. Jevrejeva (2004), Application of the cross wavelet transform and wavelet coherence to geophysical time series, Nonlinear Processes in Geophysics, 11, 561–566. diff --git a/pyleoclim/core/corr.py b/pyleoclim/core/corr.py index 34b80511..efc01dfc 100644 --- a/pyleoclim/core/corr.py +++ b/pyleoclim/core/corr.py @@ -61,6 +61,7 @@ class Corr: -------- pyleoclim.utils.correlation.corr_sig : Correlation function + pyleoclim.utils.correlation.fdr : FDR function ''' diff --git a/pyleoclim/core/correns.py b/pyleoclim/core/correns.py index 65eac123..427c6623 100644 --- a/pyleoclim/core/correns.py +++ b/pyleoclim/core/correns.py @@ -134,6 +134,7 @@ def plot(self, figsize=[4, 4], title=None, ax=None, savefig_settings=None, hist_ Parameters ---------- + figsize : list, optional The figure size. The default is [4, 4]. @@ -143,10 +144,12 @@ def plot(self, figsize=[4, 4], title=None, ax=None, savefig_settings=None, hist_ Plot title. The default is None. multiple: str, optional + Approach to organizing the 3 different histrograms on the plot. possible values: “layer”[default], “dodge”, “stack”, “fill” alpha : float in [0, 1] + transparency parameter for histrogram bars. Default: 0.8 savefig_settings : dict diff --git a/pyleoclim/core/ensembleseries.py b/pyleoclim/core/ensembleseries.py index 296a12a9..c9929024 100644 --- a/pyleoclim/core/ensembleseries.py +++ b/pyleoclim/core/ensembleseries.py @@ -21,6 +21,7 @@ import matplotlib.transforms as transforms import matplotlib as mpl from tqdm import tqdm +import warnings from scipy.stats.mstats import mquantiles class EnsembleSeries(MultipleSeries): @@ -75,6 +76,65 @@ def make_labels(self): time_header = f'{time_name_str}' return time_header, value_header + + def slice(self, timespan): + ''' Selects a limited time span from the object + + Parameters + ---------- + + timespan : tuple or list + The list of time points for slicing, whose length must be even. + When there are n time points, the output Series includes n/2 segments. + For example, if timespan = [a, b], then the sliced output includes one segment [a, b]; + if timespan = [a, b, c, d], then the sliced output includes segment [a, b] and segment [c, d]. + + Returns + ------- + + new : EnsembleSeries + The sliced EnsembleSeries object. + + Examples + -------- + + slice the SOI from 1972 to 1998 + + .. ipython:: python + :okwarning: + :okexcept: + + nn = 20 # number of noise realizations + nt = 200 + series_list = [] + + time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=2.0) + + ts = pyleo.Series(time=time, value = signal).standardize() + noise = np.random.randn(nt,nn) + + for idx in range(nn): # noise + ts = pyleo.Series(time=time, value=ts.value+5*noise[:,idx]) + series_list.append(ts) + + ts_ens = pyleo.EnsembleSeries(series_list) + + @savefig ts_ens_plot_orig.png + fig, ax = ts_ens.plot_envelope(curve_lw=1.5) + + @savefig ts_ens_plot_trunc.png + fig, ax = ts_ens.slice([100, 199]).plot_envelope(curve_lw=1.5) + pyleo.closefig(fig) + ''' + new = self.copy() + + for idx, ts in enumerate(self.series_list): + tsc = ts.slice(timespan) + new.series_list[idx] = tsc + + return new + + def quantiles(self, qs=[0.05, 0.5, 0.95]): '''Calculate quantiles of an EnsembleSeries object @@ -91,7 +151,7 @@ def quantiles(self, qs=[0.05, 0.5, 0.95]): Returns ------- - ens_qs : pyleoclim.EnsembleSeries + ens_qs : EnsembleSeries EnsembleSeries object containing empirical quantiles of original @@ -127,7 +187,8 @@ def quantiles(self, qs=[0.05, 0.5, 0.95]): vals.append(ts.value) vals = np.array(vals) - ens_qs = mquantiles(vals, qs, axis=0) + # ens_qs = mquantiles(vals, qs, axis=0) + ens_qs = np.nanquantile(vals, qs, axis=0) ts_list = [] for i, quant in enumerate(ens_qs): @@ -147,7 +208,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, fdr Parameters ---------- - target : pyleoclim.Series or pyleoclim.EnsembleSeries + target : Series or EnsembleSeries A pyleoclim Series object or EnsembleSeries object. When the target is also an EnsembleSeries object, then the calculation of correlation is performed in a one-to-one sense, @@ -192,7 +253,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, fdr Returns ------- - corr_ens : pyleoclim.CorrEns + corr_ens : CorrEns The resulting object, see pyleoclim.CorrEns @@ -426,7 +487,7 @@ def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num np.random.seed(seed) nts = np.size(self.series_list) - random_draw_idx = np.random.choice(nts, num_traces) + random_draw_idx = np.random.choice(nts, num_traces, replace=False) for idx in random_draw_idx: self.series_list[idx].plot(xlabel=xlabel, ylabel=ylabel, zorder=99, linewidth=lw, @@ -598,12 +659,12 @@ def plot_envelope(self, figsize=[10, 4], qs=[0.025, 0.25, 0.5, 0.75, 0.975], # plot outer envelope ax.fill_between( time, ts_qs.series_list[0].value, ts_qs.series_list[4].value, - color=shade_clr, alpha=shade_alpha, edgecolor=shade_clr, label=outer_shade_label, + color=shade_clr, alpha=shade_alpha, edgecolor=shade_clr, label=outer_shade_label ) # plot inner envelope on top ax.fill_between( time, ts_qs.series_list[1].value, ts_qs.series_list[3].value, - color=shade_clr, alpha=2*shade_alpha, edgecolor=shade_clr, label=inner_shade_label, + color=shade_clr, alpha=2*shade_alpha, edgecolor=shade_clr, label=inner_shade_label ) # plot the median @@ -855,7 +916,7 @@ def stackplot(self, figsize=[5, 15], savefig_settings=None, xlim=None, fill_bet return ax - def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, + def histplot(self, figsize=[10, 4], title=None, savefig_settings=None, ax=None, ylabel='KDE', vertical=False, edgecolor='w', **plot_kwargs): """ Plots the distribution of the timeseries across ensembles @@ -927,8 +988,8 @@ def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, ts_ens = pyleo.EnsembleSeries(series_list) - @savefig ens_distplot.png - fig, ax = ts_ens.distplot() + @savefig ens_histplot.png + fig, ax = ts_ens.histplot() pyleo.closefig(fig) """ @@ -963,4 +1024,3 @@ def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, return fig, ax else: return ax - diff --git a/pyleoclim/core/lipd.py b/pyleoclim/core/lipd.py index 31a3fa74..9d62fa2c 100644 --- a/pyleoclim/core/lipd.py +++ b/pyleoclim/core/lipd.py @@ -196,7 +196,7 @@ def extract(self,dataSetName): Returns ------- - new : pyleoclim.Lipd + new : Lipd A new object corresponding to a particular dataset @@ -321,6 +321,7 @@ def mapAllArchive(self, projection = 'Robinson', proj_default = True, Parameters ---------- + projection : str, optional The projection to use. The default is 'Robinson'. @@ -389,6 +390,7 @@ def mapAllArchive(self, projection = 'Robinson', proj_default = True, Returns ------- + res : tuple or fig The figure and axis if asked. diff --git a/pyleoclim/core/lipdseries.py b/pyleoclim/core/lipdseries.py index c2d7eba3..437d0ef9 100644 --- a/pyleoclim/core/lipdseries.py +++ b/pyleoclim/core/lipdseries.py @@ -156,6 +156,7 @@ def copy(self): Returns ------- + object : pyleoclim.LipdSeries New object with data copied from original @@ -194,7 +195,7 @@ def chronEnsembleToPaleo(self, D, number=None, chronNumber=None, modelNumber=Non Returns ------- - ens : pyleoclim.EnsembleSeries + ens : EnsembleSeries An EnsembleSeries object with each series representing a possible realization of the age model See also @@ -293,6 +294,7 @@ def map(self, projection='Orthographic', proj_default=True, Parameters ---------- + projection : str, optional The projection to use. The default is 'Robinson'. @@ -618,13 +620,14 @@ def getMetadata(self): return res - def dashboard(self, figsize=[11, 8], plt_kwargs=None, distplt_kwargs=None, spectral_kwargs=None, + def dashboard(self, figsize=[11, 8], plt_kwargs=None, histplt_kwargs=None, spectral_kwargs=None, spectralsignif_kwargs=None, spectralfig_kwargs=None, map_kwargs=None, metadata=True, savefig_settings=None, ensemble=False, D=None): ''' Parameters ---------- + figsize : list or tuple, optional Figure size. The default is [11,8]. @@ -633,9 +636,9 @@ def dashboard(self, figsize=[11, 8], plt_kwargs=None, distplt_kwargs=None, spect Optional arguments for the timeseries plot. See Series.plot() or EnsembleSeries.plot_envelope(). The default is None. - distplt_kwargs : dict, optional + histplt_kwargs : dict, optional - Optional arguments for the distribution plot. See Series.distplot() or EnsembleSeries.plot_displot(). The default is None. + Optional arguments for the distribution plot. See Series.histplot() or EnsembleSeries.plot_distplot(). The default is None. spectral_kwargs : dict, optional @@ -675,6 +678,7 @@ def dashboard(self, figsize=[11, 8], plt_kwargs=None, distplt_kwargs=None, spect Returns ------- + fig : matplotlib.figure The figure @@ -690,23 +694,23 @@ def dashboard(self, figsize=[11, 8], plt_kwargs=None, distplt_kwargs=None, spect pyleoclim.core.ensembleseries.EnsembleSeries.plot_envelope: Envelope plots for an ensemble - pyleoclim.core.series.Series.distplot : plot a distribution of the timeseries + pyleoclim.core.series.Series.histplot : plot a distribution of the timeseries - pyleoclim.core.ensembleseries.EnsembleSeries.distplot : plot a distribution of the timeseries across ensembles + pyleoclim.core.ensembleseries.EnsembleSeries.histplot : plot a distribution of the timeseries across ensembles pyleoclim.core.series.Series.spectral : spectral analysis method. pyleoclim.core.multipleseries.MultipleSeries.spectral : spectral analysis method for multiple series. - pyleoclim.core.PSD.PSD.signif_test : significance test for timeseries analysis + pyleoclim.core.psds.PSD.signif_test : significance test for timeseries analysis - pyleoclim.core.PSD.PSD.plot : plot power spectrum + pyleoclim.core.psds.PSD.plot : plot power spectrum - pyleoclim.core.MulitplePSD.MulitplePSD.plot : plot envelope of power spectrum + pyleoclim.core.psds.MulitplePSD.plot : plot envelope of power spectrum pyleoclim.core.lipdseries.LipdSeries.map : map location of dataset - pyleolim.core.lipdseries.LipdSeries.getMetadata : get relevant metadata from the timeseries object + pyleoclim.core.lipdseries.LipdSeries.getMetadata : get relevant metadata from the timeseries object pyleoclim.utils.mapping.map : Underlying mapping function for Pyleoclim @@ -771,18 +775,18 @@ def dashboard(self, figsize=[11, 8], plt_kwargs=None, distplt_kwargs=None, spect ymin, ymax = ax['ts'].get_ylim() # plot the distplot - distplt_kwargs = {} if distplt_kwargs is None else distplt_kwargs.copy() + histplt_kwargs = {} if histplt_kwargs is None else histplt_kwargs.copy() ax['dts'] = fig.add_subplot(gs[0, -1:]) - distplt_kwargs.update({'ax': ax['dts']}) - distplt_kwargs.update({'ylabel': 'Counts'}) - distplt_kwargs.update({'vertical': True}) - if 'color' not in distplt_kwargs.keys(): + histplt_kwargs.update({'ax': ax['dts']}) + histplt_kwargs.update({'ylabel': 'Counts'}) + histplt_kwargs.update({'vertical': True}) + if 'color' not in histplt_kwargs.keys(): archiveType = lipdutils.LipdToOntology(res['archiveType']).lower().replace(" ", "") - distplt_kwargs.update({'color': self.plot_default[archiveType][0]}) + histplt_kwargs.update({'color': self.plot_default[archiveType][0]}) if ensemble == False: - ax['dts'] = self.distplot(**distplt_kwargs) + ax['dts'] = self.histplot(**histplt_kwargs) elif ensemble == True: - ax['dts'] = ensc.distplot(**distplt_kwargs) + ax['dts'] = ensc.histplot(**histplt_kwargs) ax['dts'].set_ylim([ymin, ymax]) ax['dts'].set_yticklabels([]) ax['dts'].set_ylabel('') @@ -953,6 +957,7 @@ def mapNearRecord(self, D, n=5, radius=None, sameArchive=False, Parameters ---------- + D : pyleoclim.Lipd A pyleoclim LiPD object diff --git a/pyleoclim/core/multipleseries.py b/pyleoclim/core/multipleseries.py index 6271fab0..54a262a2 100644 --- a/pyleoclim/core/multipleseries.py +++ b/pyleoclim/core/multipleseries.py @@ -12,13 +12,13 @@ from ..core.psds import MultiplePSD from ..core.spatialdecomp import SpatialDecomp -import matplotlib.pyplot as plt import numpy as np from copy import deepcopy from matplotlib.ticker import FormatStrFormatter import matplotlib.transforms as transforms import matplotlib as mpl +import matplotlib.pyplot as plt from tqdm import tqdm from scipy import stats @@ -44,7 +44,7 @@ class MultipleSeries: If None, then no conversion will be applied; Otherwise, the time unit of every series in the list will be converted to the target. - name : str + name : str name of the collection of timeseries (e.g. 'PAGES 2k ice cores') @@ -80,7 +80,7 @@ def __init__(self, series_list, time_unit=None, name=None): self.series_list = new_ts_list def convert_time_unit(self, time_unit='years'): - ''' Convert the time unit of the timeseries + ''' Convert the time units of the object Parameters ---------- @@ -166,7 +166,7 @@ def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', **kw Returns ------- - ms : pyleoclim.core.multipleseries.MultipleSeries + ms : MultipleSeries See also -------- @@ -226,7 +226,7 @@ def append(self,ts): Returns ------- - ms : pyleoclim.MultipleSeries + ms : MultipleSeries The augmented object, comprising the old one plus `ts` @@ -267,7 +267,7 @@ def copy(self): Returns ------- - ms : pyleoclim.MultipleSeries + ms : MultipleSeries The copied version of the pyleoclim.MultipleSeries object @@ -292,6 +292,51 @@ def copy(self): ms_copy = ms.copy() ''' return deepcopy(self) + + def flip(self, axis='value'): + ''' + Flips the Series along one or both axes + + Parameters + ---------- + axis : str, optional + The axis along which the Series will be flipped. The default is 'value'. + Other acceptable options are 'time' or 'both'. + TODO: enable time flipping after paleopandas is released + + Returns + ------- + ms : MultipleSeries + The flipped object + + Examples + -------- + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + url = 'http://wiki.linked.earth/wiki/index.php/Special:WTLiPD?op=export&lipdid=MD982176.Stott.2004' + data = pyleo.Lipd(usr_path = url) + tslist = data.to_LipdSeriesList() + tslist = tslist[2:] # drop the first two series which only concerns age and depth + ms = pyleo.MultipleSeries(tslist) + + @savefig ms_flip.png + fig, ax = ms.flip().stackplot() + pyleo.closefig(fig) + + + Note that labels have been updated to reflect the flip + ''' + + ms=self.copy() + for idx,item in enumerate(ms.series_list): + s=item.flip(axis=axis, keep_log=False) + ms.series_list[idx]=s + + return ms def standardize(self): '''Standardize each series object in a collection @@ -299,7 +344,7 @@ def standardize(self): Returns ------- - ms : pyleoclim.MultipleSeries + ms : MultipleSeries The standardized pyleoclim.MultipleSeries object @@ -338,7 +383,7 @@ def increments(self, step_style='median', verbose=False): Parameters ---------- - step_style : str; {"median","mean,"mode","max"} + step_style : str; {'median','mean','mode','max'} Method to obtain a representative step if x is not evenly spaced. Valid entries: 'median' [default], 'mean', 'mode' or 'max'. @@ -399,13 +444,15 @@ def increments(self, step_style='median', verbose=False): def common_time(self, method='interp', step = None, start = None, stop = None, step_style = None, **kwargs): ''' Aligns the time axes of a MultipleSeries object - The alignment is achieved via binning, interpolation., or Gaussian kernel. Alignment is critical for workflows + The alignment is achieved via binning, interpolation, or Gaussian kernel. Alignment is critical for workflows that need to assume a common time axis for the group of series under consideration. The common time axis is characterized by the following parameters: start : the latest start date of the bunch (maximun of the minima) + stop : the earliest stop date of the bunch (minimum of the maxima) + step : The representative spacing between consecutive values Optional arguments for binning, Gaussian kernel (gkernel) interpolation are those of the underling functions. @@ -443,7 +490,7 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s Returns ------- - ms : pyleoclim.MultipleSeries + ms : MultipleSeries The MultipleSeries objects with all series aligned to the same time axis. @@ -581,6 +628,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, Parameters ---------- + target : pyleoclim.Series, optional The Series against which to take the correlation. If the target Series is not specified, then the 1st member of MultipleSeries will be used as the target @@ -621,7 +669,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, Returns ------- - corr : pyleoclim.CorrEns.CorrEns + corr : CorrEns the result object @@ -801,7 +849,7 @@ def pca(self,weights=None,missing='fill-em',tol_em=5e-03, max_em_iter=100,**pca_ Returns ------- - res: pyleoclim.SpatialDecomp + res: SpatialDecomp Resulting pyleoclim.SpatialDecomp object @@ -940,7 +988,9 @@ def bin(self, **kwargs): The common time axis is characterized by the following parameters: start : the latest start date of the bunch (maximin of the minima) + stop : the earliest stop date of the bunch (minimum of the maxima) + step : The representative spacing between consecutive values (mean of the median spacings) This is a special case of the common_time function. @@ -955,7 +1005,7 @@ def bin(self, **kwargs): Returns ------- - ms : pyleoclim.MultipleSeries + ms : MultipleSeries The MultipleSeries objects with all series aligned to the same time axis. @@ -999,7 +1049,9 @@ def gkernel(self, **kwargs): The common time axis is characterized by the following parameters: start : the latest start date of the bunch (maximin of the minima) + stop : the earliest stop date of the bunch (minimum of the maxima) + step : The representative spacing between consecutive values (mean of the median spacings) This is a special case of the common_time function. @@ -1013,7 +1065,8 @@ def gkernel(self, **kwargs): Returns ------- - ms : pyleoclim.MultipleSeries + + ms : MultipleSeries The MultipleSeries objects with all series aligned to the same time axis. See also @@ -1056,7 +1109,9 @@ def interp(self, **kwargs): The common time axis is characterized by the following parameters: start : the latest start date of the bunch (maximin of the minima) + stop : the earliest stop date of the bunch (minimum of the maxima) + step : The representative spacing between consecutive values (mean of the median spacings) This is a special case of the common_time function. @@ -1068,7 +1123,8 @@ def interp(self, **kwargs): Returns ------- - ms : pyleoclim.MultipleSeries + + ms : MultipleSeries The MultipleSeries objects with all series aligned to the same time axis. @@ -1115,8 +1171,8 @@ def detrend(self,method='emd',**kwargs): Options include: * linear: the result of a linear least-squares fit to y is subtracted from y. * constant: only the mean of data is subtrated. - * "savitzky-golay", y is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y. - * "emd" (default): Empirical mode decomposition. The last mode is assumed to be the trend and removed from the series + * 'savitzky-golay', y is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y. + * 'emd' (default): Empirical mode decomposition. The last mode is assumed to be the trend and removed from the series **kwargs : dict Relevant arguments for each of the methods. @@ -1124,7 +1180,7 @@ def detrend(self,method='emd',**kwargs): Returns ------- - ms : pyleoclim.MultipleSeries + ms : MultipleSeries The detrended timeseries @@ -1138,7 +1194,7 @@ def detrend(self,method='emd',**kwargs): ms=self.copy() for idx,item in enumerate(ms.series_list): s=item.copy() - v_mod=tsutils.detrend(item.value,x=item.time,method=method,**kwargs) + v_mod, _=tsutils.detrend(item.value,x=item.time,method=method,**kwargs) s.value=v_mod ms.series_list[idx]=s return ms @@ -1181,7 +1237,7 @@ def spectral(self, method='lomb_scargle', settings=None, mute_pbar=False, freq_m Returns ------- - psd : pyleoclim.MultiplePSD + psd : MultiplePSD A Multiple PSD object @@ -1264,10 +1320,11 @@ def wavelet(self, method='cwt', settings={}, freq_method='log', freq_kwargs=None method : str {wwz, cwt} - cwt - the continuous wavelet transform (as per Torrence and Compo [1998]) + - cwt - the continuous wavelet transform (as per Torrence and Compo [1998]) is appropriate only for evenly-spaced series. - wwz - the weighted wavelet Z-transform (as per Foster [1996]) + - wwz - the weighted wavelet Z-transform (as per Foster [1996]) is appropriate for both evenly and unevenly-spaced series. + Default is cwt, returning an error if the Series is unevenly-spaced. settings : dict, optional @@ -1295,7 +1352,7 @@ def wavelet(self, method='cwt', settings={}, freq_method='log', freq_kwargs=None Returns ------- - scals : pyleoclim.MultipleScalograms + scals : MultipleScalograms A Multiple Scalogram object @@ -1366,11 +1423,11 @@ def plot(self, figsize=[10, 4], marker : str, optional - marker type. The default is None. + Marker type. The default is None. markersize : float, optional - marker size. The default is None. + Marker size. The default is None. linestyle : str, optional @@ -1410,7 +1467,7 @@ def plot(self, figsize=[10, 4], legend : bool, optional - Wether the show the legend. The default is True. + Whether the show the legend. The default is True. plot_kwargs : dict, optional @@ -1529,13 +1586,14 @@ def plot(self, figsize=[10, 4], return ax def stackplot(self, figsize=None, savefig_settings=None, xlim=None, fill_between_alpha=0.2, colors=None, cmap='tab10', norm=None, labels='auto', - spine_lw=1.5, grid_lw=0.5, font_scale=0.8, label_x_loc=-0.15, v_shift_factor=3/4, linewidth=1.5, plot_kwargs=None): + spine_lw=1.5, grid_lw=0.5, label_x_loc=-0.15, v_shift_factor=3/4, linewidth=1.5, plot_kwargs=None): ''' Stack plot of multiple series Note that the plotting style is uniquely designed for this one and cannot be properly reset with `pyleoclim.set_style()`. Parameters ---------- + figsize : list Size of the figure. @@ -1586,10 +1644,6 @@ def stackplot(self, figsize=None, savefig_settings=None, xlim=None, fill_betwee The linewidth for the gridlines. - font_scale : float - - The scale for the font sizes. Default is 0.8. - label_x_loc : float The x location for the label of each curve. @@ -1606,8 +1660,9 @@ def stackplot(self, figsize=None, savefig_settings=None, xlim=None, fill_betwee plot_kwargs: dict or list of dict Arguments to further customize the plot from matplotlib.pyplot.plot. - Dictionary: Arguments will be applied to all lines in the stackplots - List of dictionary: Allows to customize one line at a time. + + - Dictionary: Arguments will be applied to all lines in the stackplots + - List of dictionary: Allows to customize one line at a time. Returns ------- @@ -1687,8 +1742,6 @@ def stackplot(self, figsize=None, savefig_settings=None, xlim=None, fill_betwee pyleo.closefig(fig) #Optional figure close after plotting ''' - current_style = deepcopy(mpl.rcParams) - plotting.set_style('journal', font_scale=font_scale) savefig_settings = {} if savefig_settings is None else savefig_settings.copy() n_ts = len(self.series_list) @@ -1814,13 +1867,183 @@ def stackplot(self, figsize=None, savefig_settings=None, xlim=None, fill_betwee for x in xt: ax[n_ts].axvline(x=x, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1) + if 'fig' in locals(): + if 'path' in savefig_settings: + plotting.savefig(fig, settings=savefig_settings) + return fig, ax + else: + return ax + + def stripes(self, ref_period=None, figsize=None, savefig_settings=None, + LIM = 2.8, thickness=1.0, labels='auto', label_color = 'gray', + common_time_kwargs=None, xlim=None, font_scale=0.8, x_offset = 0.05): + ''' + Represents a MultipleSeries object as a quilt of Ed Hawkins' "stripes" patterns + + To ensure comparability, constituent series are placed on a common time axis, using + `MultipleSeries.common_time()`. To ensure consistent scaling, all series are Gaussianized + prior to plotting. + + Credit: https://showyourstripes.info/, + Implementation: https://matplotlib.org/matplotblog/posts/warming-stripes/ + + Parameters + ---------- + + ref_period : TYPE, optional + dates of the reference period, in the form "(first, last)". + The default is None, which will pick the beginning and end of the common time axis. + + LIM : float + scaling factor for color saturation. default is 2.8. + The higher the LIM, the more compressed the color range (milder hues) + + thickness : float, optional + vertical thickness of the stripe . The default is 1.0 + + figsize : list + + Size of the figure. + + savefig_settings : dictionary + + the dictionary of arguments for plt.savefig(); some notes below: + + - 'path' must be specified; it can be any existing or non-existing path, + with or without a suffix; if the suffix is not given in 'path', it will follow 'format' + - 'format' can be one of {"pdf", 'eps', 'png', ps'} The default is None. + + xlim : list + The x-axis limit. + + x_offset : float + value controlling the horizontal offset between stripes and labels (default = 0.05) + + labels: None, 'auto' or list + + If None, doesn't add labels to the subplots + + If 'auto', uses the labels passed during the creation of pyleoclim.Series + + If list, pass a list of strings for each labels. + Default is 'auto' + + common_time_kwargs : dict + Optional arguments for common_time() + + font_scale : float + The scale for the font sizes. Default is 0.8. + + Returns + ------- + + fig : matplotlib.figure + the figure object from matplotlib + See [matplotlib.pyplot.figure](https://matplotlib.org/stable/api/figure_api.html) for details. + + ax : matplotlib.axis + the axis object from matplotlib + See [matplotlib.axes](https://matplotlib.org/stable/api/axes_api.html) for details. + + See also + -------- + + pyleoclim.core.multipleseries.MultipleSeries.common_time : aligns the time axes of a MultipleSeries object + + pyleoclim.utils.plotting.savefig : saving a figure in Pyleoclim + + pyleoclim.core.series.Series.stripes : stripes representation in Pyleoclim + + pyleoclim.utils.tsutils.gaussianize : mapping to a standard Normal distribution + + Examples + -------- + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + url = 'http://wiki.linked.earth/wiki/index.php/Special:WTLiPD?op=export&lipdid=MD982176.Stott.2004' + d = pyleo.Lipd(usr_path = url) + tslist = d.to_LipdSeriesList() + tslist = tslist[2:] # drop the first two series which only concerns age and depth + ms = pyleo.MultipleSeries(tslist) + @savefig md76_stripes.png + fig, ax = ms.stripes() + pyleo.closefig(fig) + + The default style has rather thick bands, intense colors, and too many stripes. + The first issue can be solved by passing a figsize tuple; the second by increasing the LIM parameter; + the third by passing a step of 0.5 (500y) to common_time(). Finally, the + labels are too close to the edge of the plot, which can be adjusted with x_offset, like so: + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + url = 'http://wiki.linked.earth/wiki/index.php/Special:WTLiPD?op=export&lipdid=MD982176.Stott.2004' + d = pyleo.Lipd(usr_path = url) + tslist = d.to_LipdSeriesList() + tslist = tslist[2:] # drop the first two series which only concerns age and depth + ms = pyleo.MultipleSeries(tslist) + @savefig md76_stripes2.png + fig, ax = ms.stripes(common_time_kwargs={'step': 0.5}, x_offset = 200, + LIM=4, figsize=[8,3]) + pyleo.closefig(fig) + + ''' + current_style = deepcopy(mpl.rcParams) + plotting.set_style('journal', font_scale=font_scale) + savefig_settings = {} if savefig_settings is None else savefig_settings.copy() + common_time_kwargs = {} if common_time_kwargs is None else common_time_kwargs.copy() + + # put on common timescale + msc = self.common_time(**common_time_kwargs) + + ts0 = msc.series_list[0] + time = ts0.time + # generate default axis labels + time_label, _ = ts0.make_labels() + + if ref_period is None: + ref_period = [time.min(), time.max()] + + n_ts = len(msc.series_list) + last = n_ts-1 + + if n_ts < 2: + raise ValueError("There is only one series in this object. Please use the Series class instead") + + if type(labels)==list: + if len(labels) != n_ts: + raise ValueError("The length of the label list should match the number of timeseries to be plotted") + + fig, axs = plt.subplots(n_ts, 1, sharex=True, figsize=figsize, layout = 'tight') + ax = axs.flatten() + + if xlim is None: + xlim = [time.min(), time.max()] + + for idx in range(n_ts-1): # loop over series + ts = msc.series_list[idx].gaussianize() + ts.stripes(ref_period, LIM = LIM, label_color = label_color, + ax=ax[idx], x_offset=x_offset) + + # handle bottom plot + ts = msc.series_list[last].gaussianize() + ts.stripes(ref_period, LIM = LIM, label_color = label_color, + ax=ax[last], x_offset=x_offset, show_xaxis=True) + ax[last].set_xlabel(time_label) + ax[last].set_xlim(xlim) + if 'fig' in locals(): if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) mpl.rcParams.update(current_style) return fig, ax else: - plotting.showfig(fig) # reset the plotting style mpl.rcParams.update(current_style) return ax diff --git a/pyleoclim/core/psds.py b/pyleoclim/core/psds.py index 8b226400..8d598be6 100644 --- a/pyleoclim/core/psds.py +++ b/pyleoclim/core/psds.py @@ -10,6 +10,7 @@ import numpy as np from tabulate import tabulate from copy import deepcopy +from tqdm import tqdm import warnings from matplotlib.ticker import ScalarFormatter, FormatStrFormatter @@ -190,7 +191,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], Returns ------- - new : pyleoclim.PSD + new : pyleoclim.core.psds.PSD New PSD object with appropriate significance test @@ -394,7 +395,7 @@ def beta_est(self, fmin=None, fmax=None, logf_binning_step='max', verbose=False) Returns ------- - new : pyleoclim.PSD + new : pyleoclim.core.psds.PSD New PSD object with the estimated scaling slope information, which is stored as a dictionary that includes: - beta: the scaling factor - std_err: the one standard deviation error of the scaling factor @@ -450,6 +451,67 @@ def beta_est(self, fmin=None, fmax=None, logf_binning_step='max', verbose=False) new.beta_est_res = res_dict return new + def anti_alias(self, avgs=2): + ''' Apply the anti-aliasing filter + + Parameters + ---------- + + avgs : int + flag for whether spectrum is derived from instantaneous point measurements (avgs<>1) + OR from measurements averaged over each sampling interval (avgs==1) + + Returns + ------- + + new : pyleoclim.core.psds.PSD + New PSD object with the spectral aliasing effect alleviated. + + Examples + -------- + + Generate colored noise with scaling exponent equals to unity, and test the impact of anti-aliasing filter + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + + t, v = pyleo.utils.tsmodel.gen_ts('colored_noise', alpha=1, m=1e5) # m=1e5 leads to aliasing + ts = pyleo.Series(time=t, value=v, label='colored noise') + + # without the anti-aliasing filter + @savefig color_noise_no_anti_alias.png + fig, ax = ts.spectral(method='mtm').beta_est().plot() + + # with the anti-aliasing filter + @savefig color_noise_anti_alias.png + fig, ax = ts.spectral(method='mtm').anti_alias().beta_est().plot() + + References + ---------- + + Kirchner, J. W. Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies. + Phys Rev E Stat Nonlin Soft Matter Phys 71, 66110 (2005). + + See also + -------- + + pyleoclim.utils.wavelet.AliasFilter.alias_filter : anti-aliasing filter + ''' + new = self.copy() + + dt = np.median(np.diff(self.timeseries.time)) + f_sampling = 1/dt + psd_copy = self.amplitude[1:] + freq_copy = self.frequency[1:] + alpha, filtered_pwr, model_pwer, aliased_pwr = waveutils.AliasFilter().alias_filter( + freq_copy, psd_copy, f_sampling, f_sampling*1e3, np.min(self.frequency), avgs) + + new.amplitude[1:] = np.copy(filtered_pwr) + return new + def plot(self, in_loglog=True, in_period=True, label=None, xlabel=None, ylabel='PSD', title=None, marker=None, markersize=None, color=None, linestyle=None, linewidth=None, transpose=False, xlim=None, ylim=None, figsize=[10, 4], savefig_settings=None, ax=None, @@ -462,6 +524,7 @@ def plot(self, in_loglog=True, in_period=True, label=None, xlabel=None, ylabel=' Parameters ---------- + in_loglog : bool; {True, False}, optional Plot on loglog axis. The default is True. @@ -585,6 +648,7 @@ def plot(self, in_loglog=True, in_period=True, label=None, xlabel=None, ylabel=' Returns ------- + fig, ax Examples @@ -913,7 +977,7 @@ def beta_est(self, fmin=None, fmax=None, logf_binning_step='max', verbose=False) Returns ------- - new : pyleoclim.MultiplePSD + new : pyleoclim.core.psds.MultiplePSD New MultiplePSD object with the estimated scaling slope information, which is stored as a dictionary that includes: - beta: the scaling factor @@ -1038,6 +1102,7 @@ def plot(self, figsize=[10, 4], in_loglog=True, in_period=True, xlabel=None, yla Returns ------- + fig : matplotlib.pyplot.figure ax : matplotlib.pyplot.axis @@ -1118,6 +1183,50 @@ def plot(self, figsize=[10, 4], in_loglog=True, in_period=True, xlabel=None, yla else: return ax + def anti_alias(self, avgs=2, mute_pbar=False): + ''' Apply the anti-aliasing filter + + Parameters + ---------- + + avgs : int + + flag for whether spectrum is derived from instantaneous point measurements (avgs<>1) + OR from measurements averaged over each sampling interval (avgs==1) + + mute_pbar : bool; {True,False} + + If True, the progressbar will be muted. Default is False. + + Returns + ------- + + new : pyleoclim.core.psds.MultiplePSD + New MultiplePSD object with the spectral aliasing effect alleviated. + + + References + ---------- + + Kirchner, J. W. Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies. + Phys Rev E Stat Nonlin Soft Matter Phys 71, 66110 (2005). + + See also + -------- + + pyleoclim.utils.wavelet.AliasFilter.alias_filter : anti-aliasing filter + ''' + psd_aa_list = [] + for psd_obj in tqdm(self.psd_list, total=len(self.psd_list), disable=mute_pbar, desc='Applying the anti-alias filter'): + psd_aa = psd_obj.anti_alias(avgs=avgs) + psd_aa_list.append(psd_aa) + + new = self.copy() + new.psd_list = psd_aa_list + return new + + + def plot_envelope(self, figsize=[10, 4], qs=[0.025, 0.5, 0.975], in_loglog=True, in_period=True, xlabel=None, ylabel='PSD', title=None, xlim=None, ylim=None, savefig_settings=None, ax=None, xticks=None, yticks=None, plot_legend=True, diff --git a/pyleoclim/core/scalograms.py b/pyleoclim/core/scalograms.py index 4029555a..10fb343c 100644 --- a/pyleoclim/core/scalograms.py +++ b/pyleoclim/core/scalograms.py @@ -1,7 +1,7 @@ # It is unclear why the documentation for these two modules does not build automatically using automodule. It therefore had to be built using autoclass -from ..utils import plotting, lipdutils +from ..utils import plotting, lipdutils, tsutils from ..utils import wavelet as waveutils import matplotlib.pyplot as plt @@ -149,6 +149,7 @@ def __init__(self, frequency, scale, time, amplitude, coi=None, label=None, Neff self.freq_method = freq_method self.freq_kwargs = freq_kwargs self.signif_scals = signif_scals + self.conf = None #if wave_method == 'wwz': if wwz_Neffs is None: self.wwz_Neffs = wwz_Neffs @@ -179,7 +180,7 @@ def copy(self): Returns ------- - scal : pyleoclim.Scalogram + scal : pyleoclim.core.scalograms.Scalogram The copied version of the pyleoclim.Scalogram object @@ -214,7 +215,7 @@ def __str__(self): def plot(self, variable = 'amplitude', in_scale=True, xlabel=None, ylabel=None, title=None, ylim=None, xlim=None, yticks=None, figsize=[10, 8], signif_clr='white', signif_linestyles='-', signif_linewidths=1, - contourf_style={}, cbar_style={}, savefig_settings={}, ax=None, + contourf_style={}, cbar_style={}, plot_cb=True, savefig_settings={}, ax=None, signif_thresh = 0.95): ''' Plot the scalogram @@ -351,25 +352,22 @@ def plot(self, variable = 'amplitude', in_scale=True, xlabel=None, ylabel=None, if variable == 'amplitude': cont = ax.contourf(self.time, y_axis, self.amplitude.T, **contourf_args) - elif variable=='power': + elif variable =='power': cont = ax.contourf(self.time, y_axis, self.amplitude.T**2, **contourf_args) else: raise ValueError('Variable should be either "amplitude" or "power"') ax.set_yscale('log') + # assign filled contour data to the scalogram object + self.conf = cont + # plot colorbar - cbar_args = {'drawedges': False, 'orientation': 'vertical', 'fraction': 0.15, 'pad': 0.05, 'label':variable.capitalize()} - cbar_args.update(cbar_style) - - if 'inset' in cbar_args: - cbar_args.pop('inset') - axins1 = inset_axes(ax, - width="50%", # width = 50% of parent_bbox width - height="5%", # height : 5% - loc='upper right') - cb = plt.colorbar(cont, ax =ax, cax=axins1, **cbar_args) - else: - cb = plt.colorbar(cont, ax = ax, **cbar_args) + if plot_cb is True: + cbar_args = {'drawedges': False, 'orientation': 'vertical', 'fraction': 0.15, 'pad': 0.05, + 'label': variable.capitalize()} + cbar_args.update(cbar_style) + cb = plt.colorbar(cont, ax=ax, **cbar_args) + # plot cone of influence if self.coi is not None: @@ -453,7 +451,8 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], Method to use to generate the surrogates. ar1sim uses simulated timeseries with similar persistence. ar1asym represents the theoretical, closed-form solution. The default is ar1sim - number: int + number : int + Number of surrogates to generate for significance analysis based on simulations. The default is 200. @@ -484,7 +483,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], Returns ------- - new : pyleoclim.Scalogram + new : pyleoclim.core.scalograms.Scalogram A new Scalogram object with the significance level @@ -582,8 +581,11 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], raise TypeError('qs should be a list') settings = {} if settings is None else settings.copy() + + ys = tsutils.preprocess(self.timeseries.value, self.timeseries.time, detrend=False, sg_kwargs=None, + gaussianize=self.wave_args['gaussianize'], standardize=self.wave_args['standardize']) - signif_levels=waveutils.tc_wave_signif(self.timeseries.value, + signif_levels=waveutils.tc_wave_signif(ys, self.timeseries.time, self.wave_args['scale'], self.wave_args['mother'], @@ -665,7 +667,7 @@ def quantiles(self, qs=[0.05, 0.5, 0.95]): Returns ------- - scals : pyleoclim.MultipleScalogram + scals : pyleoclim.core.scalograms.MultipleScalogram ''' freq = np.copy(self.scalogram_list[0].frequency) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 2b19e2d0..ea76500f 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -32,6 +32,7 @@ from collections import namedtuple from copy import deepcopy import matplotlib.colors as mcolors +import matplotlib.colorbar as mcb import random from matplotlib import gridspec @@ -45,13 +46,13 @@ def dict2namedtuple(d): return tupletype(**d) class Series: - '''The Series class describes the most basic objects in Pyleoclim. + '''The Series class describes the most basic objects in Pyleoclim. A Series is a simple `dictionary `_ that contains 3 things: - + * a series of real-valued numbers; - + * a time axis at which those values were measured/simulated ; - + * optionally, some metadata about both axes, like units, labels and the like. How to create and manipulate such objects is described in a short example below, while `this notebook `_ demonstrates how to apply various Pyleoclim methods to Series objects. @@ -89,13 +90,17 @@ class Series: set to True to remove the NaNs and make time axis strictly prograde with duplicated timestamps reduced by averaging the values Default is True + log : dict + + If keep_log is set to True, then a log of the transformations made to the timeseries will be kept. + verbose : bool If True, will print warning messages if there is any Examples -------- - In this example, we import the Southern Oscillation Index (SOI) into a pandas dataframe and create aSeries object. + In this example, we import the Southern Oscillation Index (SOI) into a pandas dataframe and create a Series object. .. ipython:: python :okwarning: @@ -115,21 +120,30 @@ class Series: ) ts ts.__dict__.keys() - - For a quick look at the values, one may use the `print()` method. We do so below for a short slice of the data so as not to overwhelm the display: - + + For a quick look at the values, one may use the `print()` method. We do so below for a short slice of the data so as not to overwhelm the display: + .. ipython:: python :okwarning: :okexcept: - + print(ts.slice([1982,1983])) - + ''' - def __init__(self, time, value, time_name=None, time_unit=None, value_name=None, value_unit=None, label=None, mean=None, clean_ts=True, verbose=False): + def __init__(self, time, value, time_name=None, time_unit=None, value_name=None, + value_unit=None, label=None, mean=None, clean_ts=True, log=None, verbose=False): + # TODO: remove mean argument once it's safe to do so + if log is None: + self.log = () + nlog = -1 + else: + self.log = log + nlog = len(log) - if clean_ts==True: + if clean_ts == True: value, time = tsbase.clean_ts(np.array(value), np.array(time), verbose=verbose) + self.log = self.log + ({nlog+1: 'clean_ts', 'applied': clean_ts, 'verbose': verbose},) self.time = np.array(time) self.value = np.array(value) @@ -138,15 +152,15 @@ def __init__(self, time, value, time_name=None, time_unit=None, value_name=None, self.value_name = value_name self.value_unit = value_unit self.label = label - self.clean_ts=clean_ts - self.verbose=verbose - + #self.clean_ts=clean_ts + #self.verbose=verbose + if mean is None: self.mean=np.mean(self.value) else: self.mean = mean - def convert_time_unit(self, time_unit='years'): + def convert_time_unit(self, time_unit='years', keep_log=False): ''' Convert the time unit of the Series object Parameters @@ -161,6 +175,9 @@ def convert_time_unit(self, time_unit='years'): 'my BP', 'myr BP', 'myrs BP', 'ma BP', 'ma', } + keep_log : Boolean + if True, adds this step and its parameter to the series log. + Examples -------- .. ipython:: python @@ -171,22 +188,22 @@ def convert_time_unit(self, time_unit='years'): import pandas as pd data = pd.read_csv( 'https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/Development/example_data/soi_data.csv', - skiprows=0, header=1 - ) + skiprows=0, header=1) time = data.iloc[:,1] value = data.iloc[:,2] ts = pyleo.Series(time=time, value=value, time_unit='years') new_ts = ts.convert_time_unit(time_unit='yrs BP') print('Original timeseries:') print('time unit:', ts.time_unit) - print('time:', ts.time) + print('time:', ts.time[:10]) print() print('Converted timeseries:') print('time unit:', new_ts.time_unit) - print('time:', new_ts.time) + print('time:', new_ts.time[:10]) ''' new_ts = self.copy() + if time_unit is not None: tu = time_unit.lower() if tu.find('ky')>=0 or tu.find('ka')>=0: @@ -221,7 +238,6 @@ def retrograde_time(time, time_datum, time_exponent): time_dir = 'retrograde' time_datum = 1950/1e3 time_exponent = 3 - time_unit_label = 'ky BP' elif tu.find('my')>=0 or tu.find('ma')>=0: time_dir = 'retrograde' time_datum = 1950/1e6 @@ -230,10 +246,12 @@ def retrograde_time(time, time_datum, time_exponent): time_dir ='retrograde' time_datum = 1950 time_exponent = 0 - else: + elif tu.find('yr')>=0 or tu.find('year')>=0 or tu.find('yrs')>=0 or tu.find('years')>=0: time_dir ='prograde' time_datum = 0 time_exponent = 0 + else: + raise ValueError(f"Current Series time_unit={self.time_unit} is not supported. Supported time units are: 'year', 'years', 'yr', 'yrs', 'y BP', 'yr BP', 'yrs BP', 'year BP', 'years BP', 'ky BP', 'kyr BP', 'kyrs BP', 'ka BP', 'my BP', 'myr BP', 'myrs BP', 'ma BP'.") new_time = convert_func[time_dir](self.time, time_datum, time_exponent) else: @@ -275,6 +293,9 @@ def convert_to_ma(): new_ts.value = new_value new_ts.time_unit = time_unit + if keep_log == True: + new_ts.log += ({len(new_ts.log):'convert_time_unit', 'time_unit': time_unit},) + return new_ts def make_labels(self): @@ -368,7 +389,63 @@ def stats(self): 'std':std, 'IQR': IQR} return res + + def flip(self, axis='value', keep_log = False): + ''' + Flips the Series along one or both axes + Parameters + ---------- + axis : str, optional + The axis along which the Series will be flipped. The default is 'value'. + Other acceptable options are 'time' or 'both'. + TODO: enable time flipping after paleopandas is released + + keep_log : Boolean + if True, adds this transformation to the series log. + + Returns + ------- + new : Series + The flipped series object + + Examples + -------- + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + import pandas as pd + data = pd.read_csv('https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/Development/example_data/soi_data.csv',skiprows=0,header=1) + time = data.iloc[:,1] + value = data.iloc[:,2] + ts = pyleo.Series(time=time,value=value,time_name='Year C.E', value_name='SOI', label='SOI') + tsf = ts.flip(keep_log=True) + @savefig ts_flipped.png + fig, ax = tsf.plot() + tsf.log + pyleo.closefig(fig) + ''' + if self.log is not None: + methods = [self.log[idx][idx] for idx in range(len(self.log))] + if 'flip' in methods: + warnings.warn("this Series' log indicates that it has previously been flipped") + + new = self.copy() + + if axis == 'value': + new.value = - self.value + new.value_name = new.value_name + ' x (-1)' + else: + print('Flipping is only enabled along the value axis for now') + + if keep_log == True: + new.log += ({len(new.log): 'flip', 'applied': True, 'axis': axis},) + + return new + def plot(self, figsize=[10, 4], marker=None, markersize=None, color=None, linestyle=None, linewidth=None, xlim=None, ylim=None, @@ -422,7 +499,7 @@ def plot(self, figsize=[10, 4], invert_xaxis : bool, optional if True, the x-axis of the plot will be inverted - + invert_yaxis : bool, optional same for the y-axis @@ -506,7 +583,7 @@ def plot(self, figsize=[10, 4], :okwarning: :okexcept: - fig, ax = ts.plot(color='k', savefig_settings={'path': 'ts_plot3.png'}) + fig, ax = ts.plot(color='k', savefig_settings={'path': 'ts_plot3.png'}); pyleo.closefig(fig) pyleo.savefig(fig,path='ts_plot3.png') ''' # generate default axis labels @@ -558,6 +635,143 @@ def plot(self, figsize=[10, 4], return res + def stripes(self, ref_period, LIM = 2.8, thickness=1.0, figsize=[8, 1], xlim=None, + top_label=None, bottom_label=None, label_color = 'gray', label_size = None, + xlabel=None, savefig_settings=None, ax=None, invert_xaxis=False, + show_xaxis=False, x_offset = 0.05): + '''Represents the Series as an Ed Hawkins "stripes" pattern + + Credit: https://matplotlib.org/matplotblog/posts/warming-stripes/ + + Parameters + ---------- + ref_period : array-like (2-elements) + dates of the reference period, in the form "(first, last)" + + thickness : float, optional + vertical thickness of the stripe . The default is 1.0 + + LIM : float + scaling factor for color saturation. default is 2.8 + + figsize : list + a list of two integers indicating the figure size (in inches) + + xlim : list + time axis limits + + top_label : str + the "title" label for the stripe + + bottom_label : str + the "ylabel" explaining which variable is being plotted + + invert_xaxis : bool, optional + if True, the x-axis of the plot will be inverted + + x_offset : float + value controlling the horizontal offset between stripes and labels (default = 0.05) + + show_xaxis : bool + flag indicating whether or not the x-axis should be shown (default = False) + + savefig_settings : dict + the dictionary of arguments for plt.savefig(); some notes below: + - "path" must be specified; it can be any existed or non-existed path, + with or without a suffix; if the suffix is not given in "path", it will follow "format" + - "format" can be one of {"pdf", "eps", "png", "ps"} + + ax : matplotlib.axis, optional + the axis object from matplotlib + See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. + + Returns + ------- + + fig : matplotlib.figure + the figure object from matplotlib + See [matplotlib.pyplot.figure](https://matplotlib.org/stable/api/figure_api.html) for details. + + ax : matplotlib.axis + the axis object from matplotlib + See [matplotlib.axes](https://matplotlib.org/stable/api/axes_api.html) for details. + + Notes + ----- + + When `ax` is passed, the return will be `ax` only; otherwise, both `fig` and `ax` will be returned. + + See also + -------- + + pyleoclim.utils.plotting.stripes : stripes representation of a timeseries + + pyleoclim.utils.plotting.savefig : saving a figure in Pyleoclim + + Examples + -------- + + Plot the HadCRUT5 Global Mean Surface Temperature + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + import pandas as pd + url = 'https://www.metoffice.gov.uk/hadobs/hadcrut5/data/current/analysis/diagnostics/HadCRUT.5.0.1.0.analysis.summary_series.global.annual.csv' + df = pd.read_csv(url) + time = df['Time'] + gmst = df['Anomaly (deg C)'] + ts = pyleo.Series(time=time,value=gmst, label = 'HadCRUT5', time_name='Year C.E', value_name='GMST') + @savefig hadCRUT5_stripes.png + fig, ax = ts.stripes(ref_period=(1971,2000)) + pyleo.closefig(fig) + + If you wanted to show the time axis: + + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + import pandas as pd + url = 'https://www.metoffice.gov.uk/hadobs/hadcrut5/data/current/analysis/diagnostics/HadCRUT.5.0.1.0.analysis.summary_series.global.annual.csv' + df = pd.read_csv(url) + time = df['Time'] + gmst = df['Anomaly (deg C)'] + ts = pyleo.Series(time=time,value=gmst, label = 'HadCRUT5', time_name='Year C.E', value_name='GMST') + @savefig hadCRUT5_stripes2.png + fig, ax = ts.stripes(ref_period=(1971,2000), show_xaxis=True, figsize=[8, 1.2]) + pyleo.closefig(fig) + + Note that we had to increase the figure height to make space for the extra text. + ''' + + if top_label is None: + top_label = self.label + + if bottom_label is None: + bottom_label = self.value_name + + idx0 = (np.abs(self.time - ref_period[0])).argmin() + idx1 = (np.abs(self.time - ref_period[1])).argmin() + + LIMs = self.value.std()*LIM + # Ed Hawkins says: Currently I use HadCRUT5 with a 1971-2000 baseline + # and a colour scaling of +/- 0.75K (which is probably similar to LIM). + # It should be relatively simple to duplicate the stripes exactly + + res = plotting.stripes_xy( + x=self.time, y=self.value, ref_period=(idx0,idx1), LIM = LIMs, thickness = thickness, + top_label = top_label, bottom_label = bottom_label, label_color = label_color, + figsize=figsize, ax=ax, xlim=xlim, invert_xaxis=invert_xaxis, label_size=label_size, + savefig_settings=savefig_settings, show_xaxis=show_xaxis, x_offset = x_offset, + ) + + return res + + def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): ''' Singular Spectrum Analysis @@ -588,19 +802,19 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): ------- res : object of the SsaRes class containing: - - eigvals : (M, ) array of eigenvalues + eigvals : (M, ) array of eigenvalues - - eigvecs : (M, M) Matrix of temporal eigenvectors (T-EOFs) + eigvecs : (M, M) Matrix of temporal eigenvectors (T-EOFs) - - PC : (N - M + 1, M) array of principal components (T-PCs) + PC : (N - M + 1, M) array of principal components (T-PCs) - - RCmat : (N, M) array of reconstructed components + RCmat : (N, M) array of reconstructed components - - RCseries : (N,) reconstructed series, with mean and variance restored + RCseries : (N,) reconstructed series, with mean and variance restored - - pctvar: (M, ) array of the fraction of variance (%) associated with each mode + pctvar: (M, ) array of the fraction of variance (%) associated with each mode - - eigvals_q : (M, 2) array contaitning the 5% and 95% quantiles of the Monte-Carlo eigenvalue spectrum [ if nMC >0 ] + eigvals_q : (M, 2) array contaitning the 5% and 95% quantiles of the Monte-Carlo eigenvalue spectrum [ if nMC >0 ] References ---------- @@ -621,11 +835,11 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): See also -------- - + pyleoclim.core.utils.decomposition.ssa : Singular Spectrum Analysis utility - + pyleoclim.core.ssares.SsaRes.modeplot : plot SSA modes - + pyleoclim.core.ssares.SsaRes.screeplot : plot SSA eigenvalue spectrum Examples @@ -643,11 +857,9 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): time = data.iloc[:,1] value = data.iloc[:,2] ts = pyleo.Series(time=time, value=value, time_name='Year C.E', value_name='SOI', label='SOI') - # plot @savefig ts_plot4.png fig, ax = ts.plot() - - # SSA + pyleo.closefig(fig) nino_ssa = ts.ssa(M=60) Let us now see how to make use of all these arrays. The first step is too inspect the eigenvalue spectrum ("scree plot") to identify remarkable modes. Let us restrict ourselves to the first 40, so we can see something: @@ -656,11 +868,11 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): :okwarning: :okexcept: - # plot eigenvalues @savefig ts_eigen.png - nino_ssa.screeplot() - + fig, ax = nino_ssa.screeplot() + pyleo.closefig(fig) + This highlights a few common phenomena with SSA: * the eigenvalues are in descending order * their uncertainties are proportional to the eigenvalues themselves @@ -686,8 +898,9 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): fig, ax = ts.plot(title='SOI') ax.plot(time,RCk,label='SSA reconstruction, 14 modes',color='orange') ax.legend() + pyleo.closefig(fig) + - Indeed, these first few modes capture the vast majority of the low-frequency behavior, including all the El Niño/La Niña events. What is left (the blue wiggles not captured in the orange curve) are high-frequency oscillations that might be considered "noise" from the standpoint of ENSO dynamics. This illustrates how SSA might be used for filtering a timeseries. One must be careful however: * there was not much rhyme or reason for picking 14 modes. Why not 5, or 39? All we have seen so far is that they gather >95% of the variance, which is by no means a magic number. * there is no guarantee that the first few modes will filter out high-frequency behavior, or at what frequency cutoff they will do so. If you need to cut out specific frequencies, you are better off doing it with a classical filter, like the butterworth filter implemented in Pyleoclim. However, in many instances the choice of a cutoff frequency is itself rather arbitrary. In such cases, SSA provides a principled alternative for generating a version of a timeseries that preserves features and excludes others (i.e, a filter). @@ -695,7 +908,7 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): Monte-Carlo SSA - Selecting meaningful modes in eigenproblems (e.g. EOF analysis) is more art than science. However, one technique stands out: Monte Carlo SSA, introduced by Allen & Smith, (1996) to identiy SSA modes that rise above what one would expect from "red noise", specifically an AR(1) process_process). To run it, simply provide the parameter MC, ideally with a number of iterations sufficient to get decent statistics. Here's let's use MC = 1000. The result will be stored in the eigval_q array, which has the same length as eigval, and its two columns contain the 5% and 95% quantiles of the ensemble of MC-SSA eigenvalues. + Selecting meaningful modes in eigenproblems (e.g. EOF analysis) is more art than science. However, one technique stands out: Monte Carlo SSA, introduced by Allen & Smith, (1996) to identify SSA modes that rise above what one would expect from "red noise", specifically an AR(1) process). To run it, simply provide the parameter MC, ideally with a number of iterations sufficient to get decent statistics. Here let's use MC = 1000. The result will be stored in the eigval_q array, which has the same length as eigval, and its two columns contain the 5% and 95% quantiles of the ensemble of MC-SSA eigenvalues. .. ipython:: python :okwarning: @@ -708,21 +921,23 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): .. ipython:: python :okwarning: :okexcept: - + @savefig scree_mc.png - nino_mcssa.screeplot() - + fig, ax = nino_mcssa.screeplot() + pyleo.closefig(fig) + print('Indices of modes retained: '+ str(nino_mcssa.mode_idx)) This suggests that modes 1-5 fall above the red noise benchmark. To inspect mode 1 (index 0), just type: - + .. ipython:: python :okwarning: :okexcept: - + @savefig ssa_mode0plot.png - nino_mcssa.modeplot(mode=0) - + fig, ax = nino_mcssa.modeplot(index=0) + pyleo.closefig(fig) + ''' res = decomposition.ssa(self.value, M=M, nMC=nMC, f=f, trunc = trunc, var_thresh=var_thresh) @@ -738,7 +953,7 @@ def ssa(self, M=None, nMC=0, f=0.3, trunc = None, var_thresh=80): def is_evenly_spaced(self, tol=1e-3): '''Check if the Series time axis is evenly-spaced, within tolerance - + Parameters ---------- tol : float @@ -749,13 +964,13 @@ def is_evenly_spaced(self, tol=1e-3): ------- res : bool - + ''' res = tsbase.is_evenly_spaced(self.time, tol) return res - def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', **kwargs): + def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', keep_log= False, **kwargs): ''' Filtering methods for Series objects using four possible methods: - `Butterworth `_ - `Lanczos `_ @@ -788,6 +1003,9 @@ def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', **kw If a float, it is interpreted as a low-frequency (high-scale) cutoff (lowpass). If a list, it is interpreted as a frequency band (f1, f2), with f1 < f2 (bandpass). + keep_log : Boolean + if True, adds this step and its parameters to the series log. + kwargs : dict a dictionary of the keyword arguments for the filtering method, see `pyleoclim.utils.filter.savitzky_golay`, `pyleoclim.utils.filter.butterworth`, `pyleoclim.utils.filter.lanczos` and `pyleoclim.utils.filter.firwin` for the details @@ -795,17 +1013,17 @@ def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', **kw Returns ------- - new : pyleoclim.Series + new : Series See also -------- pyleoclim.utils.filter.butterworth : Butterworth method - + pyleoclim.utils.filter.savitzky_golay : Savitzky-Golay method - + pyleoclim.utils.filter.firwin : FIR filter design using the window method - + pyleoclim.utils.filter.lanczos : lowpass filter via Lanczos resampling @@ -868,7 +1086,7 @@ def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', **kw :okwarning: :okexcept: - + fig, ax = ts.plot(label='mix') ts.filter(cutoff_freq=[15, 25], method='firwin', window='hanning').plot(ax=ax, label='After 15-25 Hz band-pass filter') @savefig ts_filter4.png @@ -947,9 +1165,11 @@ def filter(self, cutoff_freq=None, cutoff_scale=None, method='butterworth', **kw new_val = method_func[method](y, **args[method]) new.value = new_val + mu # restore the mean + if keep_log == True: + new.log += ({len(new.log): 'filter','method': method, 'args': kwargs, 'fs': fs, 'cutoff_freq': cutoff_freq},) return new - def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, + def histplot(self, figsize=[10, 4], title=None, savefig_settings=None, ax=None, ylabel='KDE', vertical=False, edgecolor='w', **plot_kwargs): ''' Plot the distribution of the timeseries values @@ -1008,8 +1228,8 @@ def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, fig, ax = ts.plot() pyleo.closefig(fig) - @savefig ts_dist.png - fig, ax = ts.distplot() + @savefig ts_hist.png + fig, ax = ts.histplot() pyleo.closefig(fig) ''' @@ -1039,11 +1259,84 @@ def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, else: return ax + # def distplot(self, figsize=[10, 4], title=None, savefig_settings=None, + # ax=None, ylabel='KDE', vertical=False, edgecolor='w', **plot_kwargs): + # ''' Plot the distribution of the timeseries values + # [legacy only ; please use histplot() instead] + + # Parameters + # ---------- + + # figsize : list + # a list of two integers indicating the figure size + + # title : str + # the title for the figure + + # savefig_settings : dict + # the dictionary of arguments for plt.savefig(); some notes below: + # - "path" must be specified; it can be any existed or non-existed path, + # with or without a suffix; if the suffix is not given in "path", it will follow "format" + # - "format" can be one of {"pdf", "eps", "png", "ps"} + + # ax : matplotlib.axis, optional + # A matplotlib axis + + # ylabel : str + # Label for the count axis + + # vertical : {True,False} + # Whether to flip the plot vertically + + # edgecolor : matplotlib.color + # The color of the edges of the bar + + # plot_kwargs : dict + # Plotting arguments for seaborn histplot: https://seaborn.pydata.org/generated/seaborn.histplot.html + + # See also + # -------- + + # pyleoclim.utils.plotting.savefig : saving figure in Pyleoclim + + # Examples + # -------- + + # Distribution of the SOI record + + # .. ipython:: python + # :okwarning: + # :okexcept: + + # import pyleoclim as pyleo + # import pandas as pd + # data=pd.read_csv('https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/Development/example_data/soi_data.csv',skiprows=0,header=1) + # time=data.iloc[:,1] + # value=data.iloc[:,2] + # ts=pyleo.Series(time=time,value=value,time_name='Year C.E', value_name='SOI', label='SOI') + + # @savefig ts_plot5.png + # fig, ax = ts.plot() + # pyleo.closefig(fig) + + # @savefig ts_dist.png + # fig, ax = ts.distplot() + # pyleo.closefig(fig) + + # ''' + # warnings.warn( + # "Distplot is deprecated. Function has been renamed histplot in order to maintain consistency with seaborn terminology", + # DeprecationWarning, + # stacklevel=2) + + # return self.histplot(figsize, title, savefig_settings, ax, ylabel, vertical, edgecolor, **plot_kwargs) + def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, time_lim=None, value_lim=None, period_lim=None, psd_lim=None, time_label=None, value_label=None, period_label=None, psd_label=None, ts_plot_kwargs = None, wavelet_plot_kwargs = None, - psd_plot_kwargs = None, gridspec_kwargs = None, y_label_loc = None, savefig_settings=None): + psd_plot_kwargs = None, gridspec_kwargs = None, y_label_loc = None, + legend = None, savefig_settings=None): ''' Produce summary plot of timeseries. @@ -1057,7 +1350,7 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, the PSD object of a Series. scalogram : Scalogram - the Scalogram object of a Series. + the Scalogram object of a Series. If the passed scalogram object contains stored signif_scals these will be plotted. figsize : list @@ -1090,14 +1383,17 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, psd_label : str the label for the amplitude axis of PDS + legend : bool + if set to True, a legend will be added to the open space above the psd plot + ts_plot_kwargs : dict - arguments to be passed to the timeseries subplot, see pyleoclim.Series.plot for details + arguments to be passed to the timeseries subplot, see Series.plot for details wavelet_plot_kwargs : dict arguments to be passed to the scalogram plot, see pyleoclim.Scalogram.plot for details psd_plot_kwargs : dict - arguments to be passed to the psd plot, see pyleoclim.PSD.plot for details + arguments to be passed to the psd plot, see PSD.plot for details Certain psd plot settings are required by summary plot formatting. These include: - ylabel - legend @@ -1105,22 +1401,20 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, These will be overriden by summary plot to prevent formatting errors gridspec_kwargs : dict - arguments to be passed to the gridspec configuration + arguments used to build the specifications for gridspec configuration The plot is constructed with six slots: - slot [0] contains a subgridspec containing the timeseries and scalogram (shared x axis) - - slot [1] contains a subgridspec containing an empty slot and the PSD plot (shared y axis with - scalogram) + - slot [1] contains a subgridspec containing an empty slot and the PSD plot (shared y axis with scalogram) - slot [2] and slot [3] are empty to allow ample room for xlabels for the scalogram and PSD plots - slot [4] contains the scalogram color bar - slot [5] is empty - + It is possible to tune the size and spacing of the various slots - 'width_ratios': list of two values describing the relative widths of the two columns (default: [6, 1]) - - 'height_ratios': list of three values describing the relative heights of the three rows (default: [8, 1, - .35]) - - 'hspace': vertical space between gridspec slots (default: 0, however if either the scalogram xlabel or - the PSD xlabel contain '\n', .05) - - 'wspace': lateral space between gridspec slots (default: 0.1) + - 'height_ratios': list of three values describing the relative heights of the three rows (default: [2, 7, .35]) + - 'hspace': vertical space between timeseries and scalogram (default: 0, however if either the scalogram xlabel or the PSD xlabel contain '\n', .05) + - 'wspace': lateral space between scalogram and psd plot slots (default: 0.05) + - 'cbspace': vertical space between the scalogram and colorbar y_label_loc : float Plot parameter to adjust horizontal location of y labels to avoid conflict with axis labels, default value is -0.15 @@ -1155,16 +1449,16 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, import pyleoclim as pyleo import pandas as pd - + ts=pd.read_csv('https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/master/example_data/soi_data.csv',skiprows = 1) series = pyleo.Series(time = ts['Year'],value = ts['Value'], time_name = 'Years', time_unit = 'AD') psd = series.spectral(freq_method = 'welch') scalogram = series.wavelet(freq_method = 'welch') - + @savefig ts_summary_plot1.png fig, ax = series.summary_plot(psd = psd,scalogram = scalogram) pyleo.closefig(fig) - + Summary_plot with pre-generated psd and scalogram objects from before and some plot modification arguments passed. Note that if the scalogram contains saved noise realizations these will be flexibly reused. See pyleo.Scalogram.signif_test() for details @@ -1174,12 +1468,12 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, import pyleoclim as pyleo import pandas as pd - + ts=pd.read_csv('https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/master/example_data/soi_data.csv',skiprows = 1) series = pyleo.Series(time = ts['Year'],value = ts['Value'], time_name = 'Years', time_unit = 'AD') psd = series.spectral(freq_method = 'welch') scalogram = series.wavelet(freq_method = 'welch') - + @savefig ts_summary_plot2.png fig, ax = series.summary_plot(psd = psd,scalogram = scalogram, period_lim = [5,0], ts_plot_kwargs = {'color':'red','linewidth':.5}, psd_plot_kwargs = {'color':'red','linewidth':.5}) pyleo.closefig(fig) @@ -1195,17 +1489,37 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, # spacing if (type(psd_label) == str and '\n' in psd_label) or (psd_label is None): gridspec_kwargs_default = {'width_ratios': [6, 1], - 'height_ratios': [8, 1, .35], - 'hspace': 0.05, 'wspace': 0} + # 'height_ratios': [8, 1, .35], + 'height_ratios': [2,7,.35], + 'hspace': 0.05, 'wspace': 0.05, + 'cbspace':1} else: gridspec_kwargs_default = {'width_ratios': [6, 1], - 'height_ratios': [8, 1, .35], - 'hspace': 0, 'wspace': 0} + # 'height_ratios': [8, 1, .35], + 'height_ratios': [2,7,.35], + 'hspace': 0, 'wspace': 0, + 'cbspace':1} + for key in gridspec_kwargs_default: if key not in gridspec_kwargs.keys(): gridspec_kwargs[key] = gridspec_kwargs_default[key] + ts_height = gridspec_kwargs['height_ratios'][0] + scal_height = gridspec_kwargs['height_ratios'][1] + cb_height = gridspec_kwargs['height_ratios'][2] + + psd_width = gridspec_kwargs['width_ratios'][1] + scal_width = gridspec_kwargs['width_ratios'][0] + + if 'cbspace' in gridspec_kwargs.keys(): + cb_space = gridspec_kwargs['cbspace'] + else: + cb_space = 1 + + gridspec_kwargs['height_ratios'] = [ts_height+scal_height, cb_space, cb_height] + del gridspec_kwargs['cbspace'] + fig = plt.figure(constrained_layout=False, figsize=figsize) gs = fig.add_gridspec(3, 2, **gridspec_kwargs) @@ -1217,14 +1531,15 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, # hspace=0, wspace=0.1) # Subgridspecs - + #Let's use the same hspace/wspace if given to a user - + gs_d = {} - gs_d['ts_scal'] = gs[0].subgridspec(2, 1, height_ratios=[1, 4], hspace=gridspec_kwargs['hspace']) - gs_d['psd'] = gs[1].subgridspec(2, 1, height_ratios=[1, 4], hspace=gridspec_kwargs['hspace']) - #gs_d['ts_scal'] = gs[0].subgridspec(2, 1, height_ratios=[1, 4], hspace=.10) - #gs_d['psd'] = gs[1].subgridspec(2, 1, height_ratios=[1, 4], hspace=.10) + gs_d['ts_scal'] = gs[0].subgridspec(2, 1, height_ratios=[ts_height, scal_height], hspace=gridspec_kwargs['hspace']) + gs_d['psd'] = gs[1].subgridspec(2, 1, height_ratios=[ts_height, scal_height], hspace=gridspec_kwargs['hspace']) + + # gs_d['ts_scal'] = gs[0].subgridspec(2, 1, height_ratios=[1, 4], hspace=gridspec_kwargs['hspace']) + # gs_d['psd'] = gs[1].subgridspec(2, 1, height_ratios=[1, 4], hspace=gridspec_kwargs['hspace']) gs_d['cb'] = gs[4].subgridspec(1, 1) ax = {} @@ -1292,31 +1607,26 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, else: orient = 'horizontal' # I think padding is now the hspace - if 'pad' in wavelet_plot_kwargs['cbar_style']: - pad = wavelet_plot_kwargs['cbar_style']['pad'] - else: - pad = 0.12 + # if 'pad' in wavelet_plot_kwargs['cbar_style']: + # pad = wavelet_plot_kwargs['cbar_style']['pad'] + # else: + # pad = 0.12 if 'label' in wavelet_plot_kwargs['cbar_style']: label = wavelet_plot_kwargs['cbar_style']['label'] else: label = wavelet_plot_kwargs['variable'].capitalize() + ' from ' + scalogram.wave_method - wavelet_plot_kwargs.update({'cbar_style': {'orientation': orient, 'pad': pad, - 'label': label}}) + wavelet_plot_kwargs.update({'cbar_style': {'orientation': orient, + 'label': label, + # 'pad': pad, + }}) - # Moving the colorbar to its own axis without leaving white space - wavelet_plot_kwargs['cbar_style']['inset'] = True wavelet_plot_kwargs['cbar_style']['drawedges'] = True - ax['scal'] = scalogram.plot(ax=ax['scal'], **wavelet_plot_kwargs) + # Do not plot colorbar in scalogram + wavelet_plot_kwargs['plot_cb'] = False - # pull colorbar specifications from scalogram plot - #cbar_data = ax['scal'].figure._localaxes.__dict__['_elements'][2][1].__dict__['_colorbar'].__dict__ - - cbar_data = ax['scal'].figure._localaxes[-1]._colorbar - - # remove inset colorbar (moved to its own axis below) - #ax['scal'].figure._localaxes.__dict__['_elements'][2][1].__dict__['_colorbar'].__dict__['ax'].remove() # clear()#remove()#.set_visible(False) - ax['scal'].figure._localaxes[-1].remove() + # Plot scalogram + ax['scal'] = scalogram.plot(ax=ax['scal'], **wavelet_plot_kwargs) if y_label_loc is not None: ax['scal'].get_yaxis().set_label_coords(y_label_loc, 0.5) @@ -1328,7 +1638,6 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, 'Ylim passed to psd plot through exposed argument and key word argument. The exposed argument takes precedence and will overwrite relevant key word argument.') if time_label is not None: - # time_label, value_label = self.make_labels() ax['scal'].set_xlabel(time_label) if 'xlabel' in wavelet_plot_kwargs: print( @@ -1344,8 +1653,11 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, ax['scal'].set_title(None) - ax['scal'].tick_params(axis='x', which='major', pad=12) - # fix ts xticks after final edits to scalogram xtick because of the sharedx + xticks = ax['scal'].get_xticks() + midpoints = xticks[:-1] + np.diff(xticks) / 2 + ax['scal'].set_xticks(midpoints[1:-1]) + + ax['scal'].tick_params(axis='x', pad=12) # which='major', if 'ylims' in psd_plot_kwargs: shared_y_lims = psd_plot_kwargs['ylims'] @@ -1426,10 +1738,45 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, ax['psd'].invert_yaxis() ax['psd'].set_ylabel(None) - ax['psd'].tick_params(axis='y', direction='in', labelleft=False) - ax['psd'].legend().remove() + ax['psd'].tick_params(axis='y', direction='in', labelleft=False, pad=12) + + if legend is None: + for key in ['ts', 'psd']: + ax[key].legend().remove() + if legend == True: + leg_h, leg_l = [], [] + for key in ['ts', 'psd']: + ax[key].legend() + _h, _l = ax[key].get_legend_handles_labels() + for ip, label in enumerate(_l): + if label not in leg_l: + if len(label.split(' ')) > 1: + if len(label) > 15: + label = label[:15] + label[15:].replace(' ', '\n', 1) + label = label.replace('simulations', 'sims') + if psd_width/scal_width < .25: + label = label.replace('threshold', 'C.L.') + leg_l.append(label) + leg_h.append(_h[ip]) + ax[key].legend().remove() + + ax['leg'] = fig.add_subplot(gs_d['psd'][0, 0]) + ax['leg'].grid(False) + for side in ['top', 'bottom', 'left', 'right']: + ax['leg'].spines[side].set_visible(False) + ax['leg'].set_xticklabels([]) + ax['leg'].set_yticklabels([]) + ax['leg'].tick_params(axis='x', which='both', length=0) + ax['leg'].tick_params(axis='y', which='both', length=0) + + x0, y0 = 1,1#0,0#-psd_width*3/4, -ts_height*3/4#, psd_width, ts_height + ax['leg'].legend(leg_h, leg_l, fontsize='small', loc='upper left')#, bbox_to_anchor=(x0, y0))# width, height)) + ax['scal'].invert_yaxis() # not sure where this needs to be + # ax['leg'] = fig.add_subplot(gs_d['psd_leg'][0, 0]) + # ax['leg'].legend(h, l) + # ax['psd'] = plt.subplot(gs[1:4, -3:], sharey=ax['scal']) # ax['psd'] = psd.plot(ax=ax['psd'], transpose=True, ylabel = 'PSD from \n' + str(psd.spec_method), **psd_plot_kwargs) # @@ -1478,7 +1825,14 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, # if 'xlabel' in psd_plot_kwargs: # print('Xlabel passed to psd plot through exposed argument and key word argument. The exposed argument takes precedence and will overwrite relevant key word argument.') + # plot color bar for scalogram using filled contour data ax['cb'] = fig.add_subplot(gs_d['cb'][0, 0]) + cb = mcb.Colorbar(ax=ax['cb'], mappable=scalogram.conf, + orientation=wavelet_plot_kwargs['cbar_style']['orientation'], + label=wavelet_plot_kwargs['cbar_style']['label'])#, + # pad=wavelet_plot_kwargs['cbar_style']['pad']) + + # # cb = mpl.colorbar.ColorbarBase(ax['cb'], orientation='horizontal', # cmap=cbar_data['cmap'], # norm=cbar_data['norm'], # vmax and vmin @@ -1486,14 +1840,14 @@ def summary_plot(self, psd, scalogram, figsize=[8, 10], title=None, # boundaries=cbar_data['boundaries'], # , # label=wavelet_plot_kwargs['cbar_style']['label'], # drawedges=cbar_data['drawedges']) # True) - - cb = mpl.colorbar.Colorbar(ax['cb'], mappable = cbar_data.mappable, - orientation='horizontal', - extend=cbar_data.extend, - boundaries=cbar_data.boundaries, # , - label=wavelet_plot_kwargs['cbar_style']['label'], - drawedges=cbar_data.drawedges) # True) - + + # cb = mpl.colorbar.Colorbar(ax['cb'], mappable = cbar_data.mappable, + # orientation='horizontal', + # extend=cbar_data.extend, + # boundaries=cbar_data.boundaries, # , + # label=wavelet_plot_kwargs['cbar_style']['label'], + # drawedges=cbar_data.drawedges) # True) + # # ticks=[0, 3, 6, 9]) if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) @@ -1504,23 +1858,28 @@ def copy(self): Returns ------- - Series : pyleoclim.Series + Series : Series A copy of the Series object ''' return deepcopy(self) - def clean(self, verbose=False): + def clean(self, verbose=False, keep_log = False): ''' Clean up the timeseries by removing NaNs and sort with increasing time points Parameters ---------- + verbose : bool If True, will print warning messages if there is any + keep_log : Boolean + if True, adds this step and its parameters to the series log. + Returns ------- - new : pyleoclim.Series + + new : Series Series object with removed NaNs and sorting ''' @@ -1528,9 +1887,11 @@ def clean(self, verbose=False): v_mod, t_mod = tsbase.clean_ts(self.value, self.time, verbose=verbose) new.time = t_mod new.value = v_mod + if keep_log == True: + new.log += ({len(new.log):'clean', 'verbose': verbose},) return new - def sort(self, verbose=False): + def sort(self, verbose=False, keep_log = False): ''' Ensure timeseries is aligned to a prograde axis. If the time axis is prograde to begin with, no transformation is applied. @@ -1539,9 +1900,12 @@ def sort(self, verbose=False): verbose : bool If True, will print warning messages if there is any + keep_log : Boolean + if True, adds this step and its parameter to the series log. + Returns ------- - new : pyleoclim.Series + new : Series Series object with removed NaNs and sorting ''' @@ -1549,37 +1913,58 @@ def sort(self, verbose=False): v_mod, t_mod = tsbase.sort_ts(self.value, self.time, verbose=verbose) new.time = t_mod new.value = v_mod + + if keep_log == True: + new.log += ({len(new.log):'sort', 'verbose': verbose},) return new - def gaussianize(self): - ''' Gaussianizes the timeseries + def gaussianize(self, keep_log = False): + ''' Gaussianizes the timeseries (i.e. maps its values to a standard normal) Returns ------- - new : pyleoclim.Series + new : Series The Gaussianized series object + keep_log : Boolean + if True, adds this transformation to the series log. + + References + ---------- + Emile-Geay, J., and M. Tingley (2016), Inferring climate variability from nonlinear proxies: application to palaeo-enso studies, Climate of the Past, 12 (1), 31–50, doi:10.5194/cp- 12-31-2016. ''' new = self.copy() v_mod = tsutils.gaussianize(self.value) new.value = v_mod + + if keep_log == True: + new.log += ({len(new.log):'gaussianize', 'applied': True},) return new - def standardize(self): + def standardize(self, keep_log = False, scale=1): """Standardizes the series ((i.e. remove its estimated mean and divides by its estimated standard deviation) Returns ------- - new : pyleoclim.Series + new : Series The standardized series object + keep_log : Boolean + if True, adds the previous mean, standard deviation and method parameters to the series log. + """ new = self.copy() - v_mod = tsutils.standardize(self.value)[0] - new.value = v_mod + vs, mu, sig = tsutils.standardize(self.value, scale=scale) + new.value = vs + + if keep_log == True: + method_dict = {len(new.log):'standardize', 'args': scale, + 'previous_mean': mu, 'previous_std': sig} + new.log += (method_dict,) return new - def center(self, timespan=None): + + def center(self, timespan=None, keep_log=False): ''' Centers the series (i.e. renove its estimated mean) Parameters @@ -1588,23 +1973,27 @@ def center(self, timespan=None): The timespan over which the mean must be estimated. In the form [a, b], where a, b are two points along the series' time axis. + keep_log : Boolean + if True, adds the previous mean and method parameters to the series log. + Returns ------- - tsc : pyleoclim.Series + new : Series The centered series object - ts_mean : estimated mean of the original series, in case it needs to be restored later ''' - tsc = self.copy() + new = self.copy() if timespan is not None: ts_mean = np.nanmean(self.slice(timespan).value) vc = self.value - ts_mean else: ts_mean = np.nanmean(self.value) vc = self.value - ts_mean - tsc.value = vc - tsc.mean = ts_mean - return tsc + new.value = vc + + if keep_log == True: + new.log += ({len(new.log): 'center', 'args': timespan, 'previous_mean': ts_mean},) + return new def segment(self, factor=10): """Gap detection @@ -1624,7 +2013,7 @@ def segment(self, factor=10): Returns ------- - res : pyleoclim.MultipleSeries Object or pyleoclim.Series Object + res : MultipleSeries or Series If gaps were detected, returns the segments in a MultipleSeries object, else, returns the original timeseries. @@ -1661,7 +2050,7 @@ def slice(self, timespan): new : Series The sliced Series object. - + Examples -------- @@ -1677,7 +2066,7 @@ def slice(self, timespan): time = data.iloc[:,1] value = data.iloc[:,2] ts = pyleo.Series(time=time, value=value, time_name='Year C.E', value_name='SOI', label='SOI') - + ts_slice = ts.slice([1972, 1998]) print("New time bounds:",ts_slice.time.min(),ts_slice.time.max()) @@ -1696,7 +2085,7 @@ def slice(self, timespan): new.value = self.value[mask] return new - def fill_na(self, timespan=None, dt=1): + def fill_na(self, timespan=None, dt=1, keep_log=False): ''' Fill NaNs into the timespan Parameters @@ -1710,6 +2099,9 @@ def fill_na(self, timespan=None, dt=1): dt : float The time spacing to fill the NaNs; default is 1. + keep_log : Boolean + if True, adds this step and its parameters to the series log. + Returns ------- @@ -1738,10 +2130,13 @@ def fill_na(self, timespan=None, dt=1): new.time = new_time new.value = new_value + if keep_log == True: + new.log += ({len(new.log):'fill_na', 'applied': True, 'dt': dt, 'timespan': timespan},) + return new - def detrend(self, method='emd', **kwargs): + def detrend(self, method='emd', keep_log=False, **kwargs): '''Detrend Series object Parameters @@ -1753,13 +2148,17 @@ def detrend(self, method='emd', **kwargs): * "constant": only the mean of data is subtracted. * "savitzky-golay", y is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y. * "emd" (default): Empirical mode decomposition. The last mode is assumed to be the trend and removed from the series - **kwargs : dict + + keep_log : Boolean + if True, adds the removed trend and method parameters to the series log. + + kwargs : dict Relevant arguments for each of the methods. Returns ------- - new : pyleoclim.Series - Detrended Series object + new : Series + Detrended Series object in "value", with new field "trend" added See also -------- @@ -1799,65 +2198,70 @@ def detrend(self, method='emd', **kwargs): # Place it all in a series object and plot it: ts = pyleo.Series(time=time,value=signal_noise + nonlinear_trend) @savefig random_series.png - fig, ax = ts.plot(title='Timeseries with nonlinear trend') + fig, ax = ts.plot(title='Timeseries with nonlinear trend'); pyleo.closefig(fig) # Detrending with default parameters (using EMD method with 1 mode) ts_emd1 = ts.detrend() ts_emd1.label = 'default detrending (EMD, last mode)' @savefig ts_emd1.png - fig, ax = ts_emd1.plot(title='Detrended with EMD method') - ax.plot(time,signal_noise,label='target signal') - ax.legend() - - + fig, ax = ts_emd1.plot(title='Detrended with EMD method'); ax.plot(time,signal_noise,label='target signal'); ax.legend(); pyleo.closefig(fig) - We see that the default function call results in a "Hockey Stick" at the end, which is undesirable. - There is no automated way to do this, but with a little trial and error, we find that removing + We see that the default function call results in a "hockey stick" at the end, which is undesirable. + There is no automated way to fix this, but with a little trial and error, we find that removing the 2 smoothest modes performs reasonably well: - + .. ipython:: python :okwarning: :okexcept: - ts_emd2 = ts.detrend(method='emd', n=2) + ts_emd2 = ts.detrend(method='emd', n=2, keep_log=True) ts_emd2.label = 'EMD detrending, last 2 modes' @savefig ts_emd_n2.png - fig, ax = ts_emd2.plot(title='Detrended with EMD (n=2)') - ax.plot(time,signal_noise,label='target signal') - ax.legend() + fig, ax = ts_emd2.plot(title='Detrended with EMD (n=2)'); ax.plot(time,signal_noise,label='target signal'); ax.legend(); pyleo.closefig(fig) Another option for removing a nonlinear trend is a Savitzky-Golay filter: - + .. ipython:: python :okwarning: :okexcept: - + ts_sg = ts.detrend(method='savitzky-golay') ts_sg.label = 'savitzky-golay detrending, default parameters' @savefig ts_sg.png - fig, ax = ts_sg.plot(title='Detrended with Savitzky-Golay filter') - ax.plot(time,signal_noise,label='target signal') - ax.legend() + fig, ax = ts_sg.plot(title='Detrended with Savitzky-Golay filter'); ax.plot(time,signal_noise,label='target signal'); ax.legend(); pyleo.closefig(fig) As we can see, the result is even worse than with EMD (default). Here it pays to look into the underlying method, which comes from SciPy. It turns out that by default, the Savitzky-Golay filter fits a polynomial to the last "window_length" values of the edges. By default, this value is close to the length of the series. Choosing a value 10x smaller fixes the problem here, though you will have to tinker with that parameter until you get the result you seek. - + .. ipython:: python :okwarning: :okexcept: - - ts_sg2 = ts.detrend(method='savitzky-golay',sg_kwargs={'window_length':201}) + + ts_sg2 = ts.detrend(method='savitzky-golay',sg_kwargs={'window_length':201}, keep_log=True) ts_sg2.label = 'savitzky-golay detrending, window_length = 201' @savefig ts_sg2.png - fig, ax = ts_sg2.plot(title='Detrended with Savitzky-Golay filter') - ax.plot(time,signal_noise,label='target signal') - ax.legend() + fig, ax = ts_sg2.plot(title='Detrended with Savitzky-Golay filter'); ax.plot(time,signal_noise,label='target signal'); ax.legend(); pyleo.closefig(fig) + Finally, the method returns the trend that was previous, so it can be added back in if need be. + + .. ipython:: python + :okwarning: + :okexcept: + + trend_ts = pyleo.Series(time = time, value = nonlinear_trend, + value_name= 'trend', label='original trend') + @savefig ts_trend.png + fig, ax = trend_ts.plot(title='Trend recovery'); ax.plot(time,ts_emd2.log[1]['previous_trend'],label=ts_emd2.label); ax.plot(time,ts_sg2.log[1]['previous_trend'], label=ts_sg2.label); ax.legend(); pyleo.closefig(fig) + + Both methods can recover the exponential trend, with some edge effects near the end that could be addressed by judicious padding. ''' new = self.copy() - v_mod = tsutils.detrend(self.value, x=self.time, method=method, **kwargs) + v_mod, trend = tsutils.detrend(self.value, x=self.time, method=method, **kwargs) new.value = v_mod + + if keep_log == True: + new.log += ({len(new.log): 'detrend','method': method, 'args': kwargs, 'previous_trend': trend},) return new def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, settings=None, label=None, scalogram=None, verbose=False): @@ -1866,7 +2270,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s Parameters ---------- - method : str; + method : str; {'wwz', 'mtm', 'lomb_scargle', 'welch', 'periodogram', 'cwt'} freq_method : str @@ -1890,7 +2294,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s Returns ------- - psd : pyleoclim.PSD + psd : PSD A PSD object See also @@ -1984,6 +2388,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_wwz_signif = psd_wwz.signif_test(number=1) # significance test; for real work, should use number=200 or even larger @savefig spec_wwz.png fig, ax = psd_wwz_signif.plot(title='PSD using WWZ method') + pyleo.closefig(fig) We may take advantage of a pre-calculated scalogram using WWZ to accelerate the spectral analysis (although note that the default parameters for spectral and wavelet analysis using WWZ are different): @@ -1996,6 +2401,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_wwz_fast = ts_std.spectral(method='wwz', scalogram=scal_wwz) @savefig spec_wwz_fast.png fig, ax = psd_wwz_fast.plot(title='PSD using WWZ method w/ pre-calculated scalogram') + pyleo.closefig(fig) - Periodogram @@ -2008,6 +2414,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_perio_signif = psd_perio.signif_test(number=20, method='ar1sim') #in practice, need more AR1 simulations @savefig spec_perio.png fig, ax = psd_perio_signif.plot(title='PSD using Periodogram method') + pyleo.closefig(fig) - Welch @@ -2019,6 +2426,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_welch_signif = psd_welch.signif_test(number=20, method='ar1sim') #in practice, need more AR1 simulations @savefig spec_welch.png fig, ax = psd_welch_signif.plot(title='PSD using Welch method') + pyleo.closefig(fig) - MTM @@ -2030,6 +2438,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_mtm_signif = psd_mtm.signif_test(number=20, method='ar1sim') #in practice, need more AR1 simulations @savefig spec_mtm.png fig, ax = psd_mtm_signif.plot(title='PSD using the multitaper method') + pyleo.closefig(fig) By default, MTM uses a half-bandwidth of 4 times the fundamental (Rayleigh) frequency, i.e. NW = 4, which is the most conservative choice. NW runs from 2 to 4 in multiples of 1/2, and can be adjusted like so (note the sharper peaks and higher overall variance, which may not be desirable): @@ -2041,6 +2450,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_mtm2 = ts_interp.spectral(method='mtm', settings={'NW':2}, label='MTM, NW=2') @savefig spec_mtm2.png psd_mtm2.plot(title='PSD using the multi-taper method', ax=ax) + pyleo.closefig(fig) - Continuous Wavelet Transform @@ -2053,6 +2463,7 @@ def spectral(self, method='lomb_scargle', freq_method='log', freq_kwargs=None, s psd_cwt_signif = psd_cwt.signif_test(number=20) @savefig spec_cwt.png fig, ax = psd_cwt_signif.plot(title='PSD using CWT method') + pyleo.closefig(fig) ''' if not verbose: @@ -2134,9 +2545,9 @@ def wavelet(self, method='cwt', settings=None, freq_method='log', freq_kwargs=No ---------- method : str {wwz, cwt} - cwt - the continuous wavelet transform (as per Torrence and Compo [1998]) + cwt - the continuous wavelet transform [1] is appropriate for evenly-spaced series. - wwz - the weighted wavelet Z-transform (as per Foster [1996]) + wwz - the weighted wavelet Z-transform [2] is appropriate for unevenly-spaced series. Default is cwt, returning an error if the Series is unevenly-spaced. @@ -2167,7 +2578,7 @@ def wavelet(self, method='cwt', settings=None, freq_method='log', freq_kwargs=No pyleoclim.utils.spectral.make_freq_vector : Functions to create the frequency vector pyleoclim.utils.tsutils.detrend : Detrending function - + pyleoclim.core.series.Series.spectral : spectral analysis tools pyleoclim.core.scalograms.Scalogram : Scalogram object @@ -2177,16 +2588,16 @@ def wavelet(self, method='cwt', settings=None, freq_method='log', freq_kwargs=No References ---------- - Torrence, C. and G. P. Compo, 1998: A Practical Guide to Wavelet Analysis. Bull. Amer. Meteor. Soc., 79, 61-78. + [1] Torrence, C. and G. P. Compo, 1998: A Practical Guide to Wavelet Analysis. Bull. Amer. Meteor. Soc., 79, 61-78. Python routines available at http://paos.colorado.edu/research/wavelets/ - Foster, G., 1996: Wavelets for period analysis of unevenly sampled time series. The Astronomical Journal, 112, 1709. + [2] Foster, G., 1996: Wavelets for period analysis of unevenly sampled time series. The Astronomical Journal, 112, 1709. Examples -------- Wavelet analysis on the evenly-spaced SOI record. The CWT method will be applied by default. - + .. ipython:: python :okwarning: :okexcept: @@ -2198,22 +2609,22 @@ def wavelet(self, method='cwt', settings=None, freq_method='log', freq_kwargs=No value = data.iloc[:,2] ts = pyleo.Series(time=time,value=value,time_name='Year C.E', value_name='SOI', label='SOI') - scal1 = ts.wavelet() + scal1 = ts.wavelet() scal_signif = scal1.signif_test(number=20) # for research-grade work, use number=200 or larger @savefig scal_cwt.png - fig, ax = scal_signif.plot() - pyleo.closefig() - + fig, ax = scal_signif.plot() + pyleo.closefig(fig) + If you wanted to invoke the WWZ method instead (here with no significance testing, to lower computational cost): - + .. ipython:: python :okwarning: :okexcept: - - scal2 = ts.wavelet(method='wwz') + + scal2 = ts.wavelet(method='wwz') @savefig scal_wwz.png fig, ax = scal2.plot() - pyleo.closefig() + pyleo.closefig(fig) Notice that the two scalograms have different amplitude, which are relative. Method-specific arguments may be passed via `settings`. For instance, if you wanted to change the default mother wavelet @@ -2222,28 +2633,28 @@ def wavelet(self, method='cwt', settings=None, freq_method='log', freq_kwargs=No .. ipython:: python :okwarning: :okexcept: - - scal3 = ts.wavelet(settings = {'mother':'DOG'}) + + scal3 = ts.wavelet(settings = {'mother':'DOG'}) @savefig scal_dog.png fig, ax = scal3.plot(title='CWT scalogram with DOG mother wavelet') - pyleo.closefig() - + pyleo.closefig(fig) + As for WWZ, note that, for computational efficiency, the time axis is coarse-grained by default to 50 time points, which explains in part the difference with the CWT scalogram. - If you need a custom axis, it (and other method-specific parameters) can also be passed + If you need a custom axis, it (and other method-specific parameters) can also be passed via the `settings` dictionary: - + .. ipython:: python :okwarning: :okexcept: - + tau = np.linspace(np.min(ts.time), np.max(ts.time), 60) - scal4 = ts.wavelet(method='wwz', settings={'tau':tau}) + scal4 = ts.wavelet(method='wwz', settings={'tau':tau}) @savefig scal_tau.png fig, ax = scal4.plot(title='WWZ scalogram with finer time axis') - pyleo.closefig() - + pyleo.closefig(fig) + ''' if not verbose: warnings.simplefilter('ignore') @@ -2282,7 +2693,8 @@ def wavelet(self, method='cwt', settings=None, freq_method='log', freq_kwargs=No wwz_Neffs = wave_res.Neffs elif method=='cwt': wwz_Neffs = None - args[method].update({'scale':wave_res.scale,'mother':wave_res.mother,'param':wave_res.param}) + args[method].update({'scale':wave_res.scale,'mother':wave_res.mother,'param':wave_res.param, + 'standardize':wave_res.standardize, 'gaussianize':wave_res.gaussianize}) scal = Scalogram( frequency=wave_res.freq, @@ -2310,7 +2722,7 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, Parameters ---------- - target_series : pyleoclim.Series + target_series : Series A pyleoclim Series object on which to perform the coherence analysis method : str @@ -2343,19 +2755,23 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, ---------- Grinsted, A., Moore, J. C. & Jevrejeva, S. Application of the cross wavelet transform and - wavelet coherence to geophysical time series. Nonlin. Processes Geophys. 11, 561–566 (2004). + wavelet coherence to geophysical time series. Nonlin. Processes Geophys. 11, 561–566 (2004). See also -------- pyleoclim.utils.spectral.make_freq_vector : Functions to create the frequency vector - + pyleoclim.utils.tsutils.detrend : Detrending function - + pyleoclim.core.multipleseries.MultipleSeries.common_time : put timeseries on common time axis - + pyleoclim.core.series.Series.wavelet : wavelet analysis + pyleoclim.utils.wavelet.wwz_coherence : coherence using the wwz method + + pyleoclim.utils.wavelet.cwt_coherence : coherence using the cwt method + Examples -------- @@ -2371,14 +2787,14 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, time = data['t'].values air = data['air'].values nino = data['nino'].values - ts_air = pyleo.Series(time=time, value=air, time_name='Year (CE)') - ts_nino = pyleo.Series(time=time, value=nino, time_name='Year (CE)') + ts_air = pyleo.Series(time=time, value=data['air'].values, time_name='Year (CE)', + label='All India Rainfall', value_name='AIR (mm/month)') + ts_nino = pyleo.Series(time=time, value=data['nino'].values, time_name='Year (CE)', + label='NINO3', value_name='NINO3 (K)') coh = ts_air.wavelet_coherence(ts_nino) - @savefig coh.png - fig, ax = coh.plot() - pyleo.closefig() + coh.plot() Note that in this example both timeseries area already on a common, evenly-spaced time axis. If they are not (either because the data are unevenly spaced, @@ -2394,11 +2810,11 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, coh_wwz = ts_air.wavelet_coherence(ts_nino, method = 'wwz') @savefig coh_wwz.png - fig, ax = coh_wwz.plot() - + coh_wwz.plot() + As with wavelet analysis, both CWT and WWZ admit optional arguments through `settings`. Significance is assessed similarly as with PSD or Scalogram objects: - + .. ipython:: python :okwarning: :okexcept: @@ -2407,20 +2823,20 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, @savefig cwt_sig.png # by default, the plot function will look for the closest quantile to 0.95, but it is easy to adjust: cwt_sig.plot(signif_thresh = 0.9) - + Another plotting option, `dashboard`, allows to visualize both - timeseries as well as the wavelet transform coherency (WTC), which quantifies where + timeseries as well as the wavelet transform coherency (WTC), which quantifies where two timeseries exhibit similar behavior in time-frequency space, and the cross-wavelet - transform (XWT), which indicates regions of high common power. - + transform (XWT), which indicates regions of high common power. + .. ipython:: python :okwarning: :okexcept: @savefig cwt_sig_dash.png cwt_sig.dashboard() - - Note: this design balances many considerations, and is not easily customizable. + + Note: this design balances many considerations, and is not easily customizable. ''' if not verbose: warnings.simplefilter('ignore') @@ -2489,7 +2905,7 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, ) return coh - + def correlation(self, target_series, timespan=None, alpha=0.05, settings=None, common_time_kwargs=None, seed=None): ''' Estimates the Pearson's correlation and associated significance between two non IID time series @@ -2499,7 +2915,7 @@ def correlation(self, target_series, timespan=None, alpha=0.05, settings=None, c 2) 'isopersistent': AR(1) modeling of x and y. 3) 'isospectral': phase randomization of original inputs. (default) - The T-test is a parametric test, hence computationally cheap but can only be performed in ideal circumstances. + The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances. The others are non-parametric, but their computational requirements scale with the number of simulations. The choise of significance test and associated number of Monte-Carlo simulations are passed through the settings parameter. @@ -2507,7 +2923,7 @@ def correlation(self, target_series, timespan=None, alpha=0.05, settings=None, c Parameters ---------- - target_series : pyleoclim.Series + target_series : Series A pyleoclim Series object timespan : tuple @@ -2533,7 +2949,7 @@ def correlation(self, target_series, timespan=None, alpha=0.05, settings=None, c Returns ------- - corr : pyleoclim.ui.Corr + corr : pyleoclim.Corr the result object, containing - r : float @@ -2550,6 +2966,9 @@ def correlation(self, target_series, timespan=None, alpha=0.05, settings=None, c -------- pyleoclim.utils.correlation.corr_sig : Correlation function + + pyleoclim.multipleseries.common_time : Aligning time axes + Examples -------- @@ -2614,22 +3033,28 @@ def correlation(self, target_series, timespan=None, alpha=0.05, settings=None, c return corr - def causality(self, target_series, method='liang', settings=None): - ''' Perform causality analysis with the target timeseries - The timeseries are first sorted in ascending order. + def causality(self, target_series, method='liang', timespan=None, settings=None, common_time_kwargs=None): + ''' Perform causality analysis with the target timeseries. Specifically, whether there is information in the target series that influenced the original series. + If the two series have different time axes, they are first placed on a common timescale (in ascending order). Parameters ---------- - target_series : pyleoclim.Series + target_series : Series A pyleoclim Series object on which to compute causality method : {'liang', 'granger'} The causality method to use. + timespan : tuple + The time interval over which to perform the calculation + settings : dict Parameters associated with the causality methods. Note that each method has different parameters. See individual methods for details + common_time_kwargs : dict + Parameters for the method `MultipleSeries.common_time()`. Will use interpolation by default. + Returns ------- @@ -2661,7 +3086,6 @@ def causality(self, target_series, method='liang', settings=None): ts_nino=pyleo.Series(time=t,value=nino) ts_air=pyleo.Series(time=t,value=air) - # plot the two timeseries @savefig ts_nino.png fig, ax = ts_nino.plot(title='NINO3 -- SST Anomalies') pyleo.closefig(fig) @@ -2670,26 +3094,50 @@ def causality(self, target_series, method='liang', settings=None): fig, ax = ts_air.plot(title='Deasonalized All Indian Rainfall Index') pyleo.closefig(fig) - # we use the specific params below in ts_nino.causality() just to make the example less heavier; - # please drop the `settings` for real work - caus_res = ts_nino.causality(ts_air, settings={'nsim': 2, 'signif_test': 'isopersist'}) - print(caus_res) + We use the specific params below to lighten computations; you may drop `settings` for real work + + .. ipython:: python + :okwarning: + :okexcept: + + liang_N2A = ts_air.causality(ts_nino, settings={'nsim': 20, 'signif_test': 'isopersist'}) + print(liang_N2A) + liang_A2N = ts_nino.causality(ts_air, settings={'nsim': 20, 'signif_test': 'isopersist'}) + print(liang_A2N) + + liang_N2A['T21']/liang_A2N['T21'] + + Both information flows (T21) are positive, but the flow from NINO3 to AIR is about 3x as large as the other way around, suggesting that NINO3 influences AIR much more than the other way around, which conforms to physical intuition. - Granger causality + To implement Granger causality, simply specfiy the method: .. ipython:: python :okwarning: :okexcept: - caus_res = ts_nino.causality(ts_air, method='granger') - print(caus_res) + granger_A2N = ts_nino.causality(ts_air, method='granger') + granger_N2A = ts_air.causality(ts_nino, method='granger') + + Note that the output is fundamentally different for the two methods. Granger causality cannot discriminate between NINO3 -> AIR or AIR -> NINO3, in this case. This is not unusual, and one reason why it is no longer in wide use. ''' - # Sort both timeseries + # Put on common axis if necessary + + ms = MultipleSeries([self, target_series]) + if list(self.time) != list(target_series.time): + common_time_kwargs = {} if common_time_kwargs is None else common_time_kwargs.copy() + ct_args = {'method': 'interp'} + ct_args.update(common_time_kwargs) + ms = ms.common_time(**ct_args) + + if timespan is None: + value1 = ms.series_list[0].value + value2 = ms.series_list[1].value + else: + value1 = ms.series_list[0].slice(timespan).value + value2 = ms.series_list[1].slice(timespan).value - sorted_self = self.sort(verbose=True) - sorted_target = target_series.sort(verbose=True) settings = {} if settings is None else settings.copy() spec_func={ @@ -2699,7 +3147,8 @@ def causality(self, target_series, method='liang', settings=None): args['liang'] = {} args['granger'] = {} args[method].update(settings) - causal_res = spec_func[method](sorted_self.value, sorted_target.value, **args[method]) + + causal_res = spec_func[method](value1, value2, **args[method]) return causal_res def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings=None): @@ -2715,7 +3164,7 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings The number of surrogates to generate length : int - Lenght of the series + Length of the series seed : int Control seed option for reproducibility @@ -2725,13 +3174,13 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings Returns ------- - surr : pyleoclim SurrogateSeries + surr : SurrogateSeries See also -------- pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator - + ''' settings = {} if settings is None else settings.copy() surrogate_func = { @@ -2757,9 +3206,9 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings return surr - def outliers(self,method='kmeans',remove=True, settings=None, + def outliers(self,method='kmeans',remove=True, settings=None, fig_outliers=True, figsize_outliers=[10,4], plotoutliers_kwargs=None, savefigoutliers_settings=None, - fig_clusters=True,figsize_clusters=[10,4], plotclusters_kwargs=None,savefigclusters_settings=None): + fig_clusters=True,figsize_clusters=[10,4], plotclusters_kwargs=None,savefigclusters_settings=None, keep_log=False): """ Remove outliers from timeseries data @@ -2788,24 +3237,32 @@ def outliers(self,method='kmeans',remove=True, settings=None, The dimensions of the cluster figures. The default is [10,4]. plotclusters_kwargs : dict, optional Arguments for the cluster plot. The default is None. - savefigclusters_settings : TYPE, optional + savefigclusters_settings : dict, optional Saving options for the cluster plot. The default is None. - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} + keep_log : Boolean + if True, adds the previous method parameters to the series log. Returns ------- - ts: pyleoclim.Series + ts: Series A new Series object witthout outliers if remove is True. Otherwise, returns the original timeseries - - res: pandas.DataFrame - Contains relevant diagnostic metrics for the clustering algorithms. - """ + + See also + -------- + + pyleoclim.utils.tsutils.detect_outliers_DBSCAN : Outlier detection using the DBSCAN method + + pyleoclim.utils.tsutils.detect_outliers_kmeans : Outlier detection using the kmeans method + + pyleoclim.utils.tsutils.remove_outliers : Remove outliers from the series + """ if method not in ['kmeans','DBSCAN']: raise ValueError('method should either be "kmeans" or "DBSCAN"') - + # run the algorithm settings = {} if settings is None else settings.copy() spec_func={ @@ -2815,150 +3272,169 @@ def outliers(self,method='kmeans',remove=True, settings=None, args['kmeans'] = {} args['DBSCAN'] = {} args[method].update(settings) - + indices, res = spec_func[method](self.value,**args[method]) - + # Create the new Series object - new=self.copy() + new=self.copy() if remove==True: if len(indices)>=1: - ts,ys=tsutils.remove_outliers(self.time,self.value,indices) + ys,ts=tsutils.remove_outliers(self.time,self.value,indices) new.value=ys new.time=ts - + # Figures # Optional parameters savefigoutliers_settings = {} if savefigoutliers_settings is None else savefigoutliers_settings.copy() savefigclusters_settings = {} if savefigclusters_settings is None else savefigclusters_settings.copy() plotoutliers_kwargs = {} if plotoutliers_kwargs is None else plotoutliers_kwargs.copy() plotclusters_kwargs = {} if plotclusters_kwargs is None else plotclusters_kwargs.copy() - + # Figure showing the outliers - + if fig_outliers == True: fig,ax = plt.subplots(figsize=figsize_outliers) time_label, value_label = self.make_labels() - + if 'xlabel' not in plotoutliers_kwargs.keys(): xlabel = time_label else: xlabel = plotoutliers_kwargs['xlabel'] plotoutliers_kwargs.pop('xlabel') - + if 'ylabel' not in plotoutliers_kwargs.keys(): ylabel = value_label else: ylabel = plotoutliers_kwargs['ylabel'] plotoutliers_kwargs.pop('ylabel') - + if 'title' not in plotoutliers_kwargs.keys(): title = None else: title = plotoutliers_kwargs['title'] plotoutliers_kwargs.pop('title') - + if 'xlim' not in plotoutliers_kwargs.keys(): xlim = None else: xlim = plotoutliers_kwargs['xlim'] plotoutliers_kwargs.pop('xlim') - + if 'ylim' not in plotoutliers_kwargs.keys(): ylim = None else: ylim = plotoutliers_kwargs['ylim'] plotoutliers_kwargs.pop('ylim') - + if 'legend' not in plotoutliers_kwargs.keys(): legend = True else: legend = plotoutliers_kwargs['legend'] plotoutliers_kwargs.pop('legend') - + if len(indices)>=1: plotting.plot_scatter_xy(self.time,self.value,self.time[indices],self.value[indices], xlabel=xlabel,ylabel=ylabel, - title = title, xlim=xlim, ylim=ylim, legend=legend, + title = title, xlim=xlim, ylim=ylim, legend=legend, plot_kwargs=plotoutliers_kwargs,ax=ax) - + else: plotting.plot_xy(self.time,self.value, xlabel=xlabel,ylabel=ylabel, - title = title, xlim=xlim, ylim=ylim, legend=legend, + title = title, xlim=xlim, ylim=ylim, legend=legend, plot_kwargs=plotoutliers_kwargs,ax=ax) - + #Saving options if 'path' in savefigoutliers_settings: plotting.savefig(fig,settings=savefigoutliers_settings) - + if fig_clusters == True: fig,ax = plt.subplots(figsize=figsize_clusters) - + # dealt with plot options time_label, value_label = self.make_labels() - + if 'xlabel' not in plotclusters_kwargs.keys(): xlabel = time_label else: xlabel = plotclusters_kwargs['xlabel'] plotclusters_kwargs.pop('xlabel') - + if 'ylabel' not in plotclusters_kwargs.keys(): ylabel = value_label else: ylabel = plotclusters_kwargs['ylabel'] plotclusters_kwargs.pop('ylabel') - + if 'title' not in plotclusters_kwargs.keys(): title = None else: title = plotclusters_kwargs['title'] plotclusters_kwargs.pop('title') - + if 'xlim' not in plotclusters_kwargs.keys(): xlim = None else: xlim = plotclusters_kwargs['xlim'] plotclusters_kwargs.pop('xlim') - + if 'ylim' not in plotclusters_kwargs.keys(): ylim = None else: ylim = plotclusters_kwargs['ylim'] plotclusters_kwargs.pop('ylim') - + if 'legend' not in plotclusters_kwargs.keys(): legend = True else: legend = plotclusters_kwargs['legend'] plotclusters_kwargs.pop('legend') - + clusters = np.array(res.loc[res['silhouette score']==np.max(res['silhouette score'])]['clusters'])[0] - + if 'c' not in plotclusters_kwargs.keys(): color_list = list(mcolors.CSS4_COLORS.keys()) color_list.remove('red') random.Random(9).shuffle(color_list) - colors = color_list[0:len(np.unique(clusters))] + colors = color_list[0:len(np.unique(clusters))] vectorizer = np.vectorize(lambda x: colors[x % len(colors)]) c = vectorizer(clusters) else: c = plotclusters_kwargs['c'] plotclusters_kwargs.pop('c') - + plotting.scatter_xy(self.time,self.value,c = c, xlabel=xlabel,ylabel=ylabel, - title = title, xlim=xlim, ylim=ylim, legend=legend, + title = title, xlim=xlim, ylim=ylim, legend=legend, plot_kwargs = plotclusters_kwargs, ax=ax) - - #plot + + #plot if np.size(indices) != 0: plotting.scatter_xy(self.time[indices],self.value[indices],c='red',ax=ax) if 'path' in savefigclusters_settings: plotting.savefig(fig,settings=savefigclusters_settings) - - return new, res - def interp(self, method='linear', **kwargs): + #return the log if asked + if keep_log == True: + if method == 'kmeans': + new.log += ({len(new.log): 'outliers','method': method, + 'args': settings, + 'nbr_clusters':np.array(res['number of clusters']), + 'silhouette_score':np.array(res['silhouette score']), + 'outlier_indices':np.array(res['outlier indices']), + 'clusters':np.array(res['clusters'])},) + elif method == 'DBSCAN': + new.log += ({len(new.log): 'outliers','method': method, + 'args': settings, + 'eps':np.array(res['eps']), + 'min_samples':np.array(res['min_samples']), + 'nbr_clusters':np.array(res['number of clusters']), + 'silhouette_score':np.array(res['silhouette score']), + 'outlier_indices':np.array(res['outlier indices']), + 'clusters':np.array(res['clusters'])},) + + return new + + def interp(self, method='linear', keep_log= False, **kwargs): '''Interpolate a Series object onto a new time axis Parameters @@ -2967,13 +3443,16 @@ def interp(self, method='linear', **kwargs): method : {‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’} where ‘zero’, ‘slinear’, ‘quadratic’ and ‘cubic’ refer to a spline interpolation of zeroth, first, second or third order; ‘previous’ and ‘next’ simply return the previous or next value of the point) or as an integer specifying the order of the spline interpolator to use. Default is ‘linear’. + keep_log : Boolean + if True, adds the method name and its parameters to the series log. + kwargs : Arguments specific to each interpolation function. See pyleoclim.utils.tsutils.interp for details Returns ------- - new : pyleoclim.Series + new : Series An interpolated Series object See also @@ -2986,13 +3465,16 @@ def interp(self, method='linear', **kwargs): ti, vi = tsutils.interp(self.time,self.value,interp_type=method,**kwargs) new.time = ti new.value = vi + if keep_log == True: + new.log += ({len(new.log):'interp', 'method': method, 'args': kwargs},) + return new - def gkernel(self, step_type='median', **kwargs): + def gkernel(self, step_type='median', keep_log = False, **kwargs): ''' Coarse-grain a Series object via a Gaussian kernel. - Like .bin() this technique is conservative and uses the max space between points - as the default spacing. Unlike .bin(), gkernel() uses a gaussian kernel to + Like .bin() this technique is conservative and uses the max space between points + as the default spacing. Unlike .bin(), gkernel() uses a gaussian kernel to calculate the weighted average of the time series over these intervals. Parameters @@ -3002,6 +3484,9 @@ def gkernel(self, step_type='median', **kwargs): type of timestep: 'mean', 'median', or 'max' of the time increments + keep_log : Boolean + if True, adds the step type and its keyword arguments to the series log. + kwargs : Arguments for kernel function. See pyleoclim.utils.tsutils.gkernel for details @@ -3009,7 +3494,7 @@ def gkernel(self, step_type='median', **kwargs): Returns ------- - new : pyleoclim.Series + new : Series The coarse-grained Series object @@ -3024,13 +3509,18 @@ def gkernel(self, step_type='median', **kwargs): ti, vi = tsutils.gkernel(self.time, self.value, **kwargs) # apply kernel new.time = ti new.value = vi + + if keep_log == True: + new.log += ({len(new.log):'gkernel', 'step_type': step_type, 'args': kwargs},) return new - def bin(self,**kwargs): + def bin(self, keep_log = False, **kwargs): '''Bin values in a time series Parameters ---------- + keep_log : Boolean + if True, adds this step and its parameters to the series log. kwargs : Arguments for binning function. See pyleoclim.utils.tsutils.bin for details @@ -3038,17 +3528,19 @@ def bin(self,**kwargs): Returns ------- - new : pyleoclim.Series + new : Series An binned Series object See also -------- - pyleoclim.utils.tsutils.bin : bin the time series into evenly-spaced bins + pyleoclim.utils.tsutils.bin : bin the series values into evenly-spaced time bins ''' new=self.copy() res_dict = tsutils.bin(self.time,self.value,**kwargs) new.time = res_dict['bins'] new.value = res_dict['binned_values'] + if keep_log == True: + new.log += ({len(new.log):'bin', 'args': kwargs},) return new diff --git a/pyleoclim/core/spatialdecomp.py b/pyleoclim/core/spatialdecomp.py index b51ece29..a253cf32 100644 --- a/pyleoclim/core/spatialdecomp.py +++ b/pyleoclim/core/spatialdecomp.py @@ -10,28 +10,35 @@ class SpatialDecomp: ''' Class to hold the results of spatial decompositions applies to : `pca()`, `mcpca()`, `mssa()` - Attributes + Parameters ---------- time: float + the common time axis locs: float (p, 2) + a p x 2 array of coordinates (latitude, longitude) for mapping the spatial patterns ("EOFs") name: str + name of the dataset/analysis to use in plots eigvals: float + vector of eigenvalues from the decomposition eigvecs: float + array of eigenvectors from the decomposition pctvar: float + array of pct variance accounted for by each mode neff: float + scalar representing the effective sample size of the leading mode ''' @@ -52,43 +59,53 @@ def screeplot(self, figsize=[6, 4], uq='N82', title='scree plot', ax=None, savef Parameters ---------- + figsize : list, optional + The figure size. The default is [6, 4]. title : str, optional + Plot title. The default is 'scree plot'. savefig_settings : dict + the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} title_kwargs : dict, optional + the keyword arguments for ax.set_title() ax : matplotlib.axis, optional + the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. xlim : list, optional + x-axis limits. The default is [0, 10] (first 10 eigenvalues) uq : str, optional + Method used for uncertainty quantification of the eigenvalues. 'N82' uses the North et al "rule of thumb" [1] with effective sample size computed as in [2]. 'MC' uses Monte-Carlo simulations (e.g. MC-EOF). Returns an error if no ensemble is found. clr_eig : str, optional + color to be used for plotting eigenvalues References ---------- - _[1] North, G. R., T. L. Bell, R. F. Cahalan, and F. J. Moeng (1982), Sampling errors in the estimation of empirical orthogonal functions, Mon. Weather Rev., 110, 699–706. - _[2] Hannachi, A., I. T. Jolliffe, and D. B. Stephenson (2007), Empirical orthogonal functions and related techniques in atmospheric science: A review, International Journal of Climatology, 27(9), 1119–1152, doi:10.1002/joc.1499. + [1]_ North, G. R., T. L. Bell, R. F. Cahalan, and F. J. Moeng (1982), Sampling errors in the estimation of empirical orthogonal functions, Mon. Weather Rev., 110, 699–706. + + [2]_ Hannachi, A., I. T. Jolliffe, and D. B. Stephenson (2007), Empirical orthogonal functions and related techniques in atmospheric science: A review, International Journal of Climatology, 27(9), 1119–1152, doi:10.1002/joc.1499. ''' savefig_settings = {} if savefig_settings is None else savefig_settings.copy() @@ -154,27 +171,34 @@ def modeplot(self, index=0, figsize=[10, 5], ax=None, savefig_settings=None, Parameters ---------- + index : int + the (0-based) index of the mode to visualize. Default is 0, corresponding to the first mode. figsize : list, optional + The figure size. The default is [10, 5]. savefig_settings : dict + the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} title_kwargs : dict + the keyword arguments for ax.set_title() gs : matplotlib.gridspec object, optional + the axis object from matplotlib See [matplotlib.gridspec.GridSpec](https://matplotlib.org/stable/tutorials/intermediate/gridspec.html) for details. spec_method: str, optional + The name of the spectral method to be applied on the PC. Default: MTM Note that the data are evenly-spaced, so any spectral method that assumes even spacing is applicable here: 'mtm', 'welch', 'periodogram' diff --git a/pyleoclim/core/ssares.py b/pyleoclim/core/ssares.py index 7f3fbef1..ba919909 100644 --- a/pyleoclim/core/ssares.py +++ b/pyleoclim/core/ssares.py @@ -135,7 +135,7 @@ def screeplot(self, figsize=[6, 4], title='SSA scree plot', ax=None, savefig_set Examples -------- - Plot the SSA eig envalue spectrum of the Southern Oscillation Index: + Plot the SSA eigenvalue spectrum of the Southern Oscillation Index: .. ipython:: python :okwarning: @@ -160,13 +160,13 @@ def screeplot(self, figsize=[6, 4], title='SSA scree plot', ax=None, savefig_set dv = v*np.sqrt(2/(n-1)) idx = np.arange(len(v))+1 if self.eigvals_q is not None: - plt.fill_between(idx,self.eigvals_q[:,0],self.eigvals_q[:,1], color=clr_mcssa, alpha = 0.3, label='AR(1) 5-95% quantiles') + ax.fill_between(idx,self.eigvals_q[:,0],self.eigvals_q[:,1], color=clr_mcssa, alpha = 0.3, label='AR(1) 5-95% quantiles') - plt.errorbar(x=idx,y=v,yerr = dv, color=clr_eig,marker='o',ls='',alpha=1.0,label=self.name) - plt.plot(idx[self.mode_idx],v[self.mode_idx],color=clr_signif,marker='o',ls='', + ax.errorbar(x=idx,y=v,yerr = dv, color=clr_eig,marker='o',ls='',alpha=1.0,label=self.name) + ax.plot(idx[self.mode_idx],v[self.mode_idx],color=clr_signif,marker='o',ls='', markersize=4, label='modes retained',zorder=10) - plt.title(title,fontweight='bold'); plt.legend() - plt.xlabel(r'Mode index $i$'); plt.ylabel(r'$\lambda_i$') + ax.set_title(title,fontweight='bold'); ax.legend() + ax.set_xlabel(r'Mode index $i$'); ax.set_ylabel(r'$\lambda_i$') ax.xaxis.set_major_locator(MaxNLocator(integer=True)) # enforce integer values @@ -220,7 +220,7 @@ def modeplot(self, index=0, figsize=[10, 5], savefig_settings=None, pyleoclim.core.series.Series.ssa : Singular Spectrum Analysis for timeseries objects - pyleoclim.core.utils.decomposition.ssa : Singular Spectrum Analysis utility + pyleoclim.utils.decomposition.ssa : Singular Spectrum Analysis utility pyleoclim.core.ssares.SsaRes.screeplot : plot SSA eigenvalue spectrum diff --git a/pyleoclim/tests/test_core_EnsembleSeries.py b/pyleoclim/tests/test_core_EnsembleSeries.py index 3361f6e2..2234d2d1 100644 --- a/pyleoclim/tests/test_core_EnsembleSeries.py +++ b/pyleoclim/tests/test_core_EnsembleSeries.py @@ -193,3 +193,40 @@ def test_plot_traces_t0(self): fig, ax = ts_ens.plot_traces(alpha=0.2, num_traces=8) pyleo.closefig(fig) +class TestUIEnsembleSeriesHistplot(): + def test_histplot_t0(self): + '''Test for EnsembleSeries.histplot() + ''' + nn = 30 # number of noise realizations + nt = 500 + series_list = [] + + signal = gen_ts(model='colored_noise', nt=nt, alpha=1.0).standardize() + noise = np.random.randn(nt, nn) + + for idx in range(nn): # noise + ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx]) + series_list.append(ts) + + ts_ens = pyleo.EnsembleSeries(series_list) + + ts_ens.histplot() + +class TestUIEnsembleSeriesDistplot(): + def test_histplot_t0(self): + '''Test for EnsembleSeries.distplot() + ''' + nn = 30 # number of noise realizations + nt = 500 + series_list = [] + + signal = gen_ts(model='colored_noise', nt=nt, alpha=1.0).standardize() + noise = np.random.randn(nt, nn) + + for idx in range(nn): # noise + ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx]) + series_list.append(ts) + + ts_ens = pyleo.EnsembleSeries(series_list) + + ts_ens.histplot() diff --git a/pyleoclim/tests/test_core_MultipleSeries.py b/pyleoclim/tests/test_core_MultipleSeries.py index c0c2a894..1275ce04 100644 --- a/pyleoclim/tests/test_core_MultipleSeries.py +++ b/pyleoclim/tests/test_core_MultipleSeries.py @@ -80,7 +80,12 @@ def test_detrend_t1(self, detrend_method): # Create a multiple series object ts_all= pyleo.MultipleSeries([ts,ts1]) - ts_detrend=ts_all.detrend(method=detrend_method) + ts_detrend=ts_all.detrend(method=detrend_method) + detrend_0 = ts_detrend.series_list[0] + detrend_1 = ts_detrend.series_list[1] + + assert len(detrend_0.value)==len(detrend_0.time) + assert len(detrend_1.value)==len(detrend_1.time) class TestMultipleSeriesPlot: '''Test for MultipleSeries.plot() @@ -121,6 +126,29 @@ def test_plot(self): assert_array_equal(v_0, y_plot_0) assert_array_equal(v_1, y_plot_1) +class TestMultipleSeriesStripes: + '''Test for MultipleSeries.stripes() + + ''' + + def test_stripes(self): + + #Generate time and value arrays + t_0, v_0 = gen_normal() + t_1, v_1 = gen_normal() + + #Create series objects + ts_0 = pyleo.Series(time = t_0, value = v_0) + ts_1 = pyleo.Series(time = t_1, value = v_1) + + #Create a list of series objects + serieslist = [ts_0, ts_1] + + #Turn this list into a multiple series object + ts_M = pyleo.MultipleSeries(serieslist) + + fig, ax = ts_M.stripes(LIM=4, x_offset=2, label_color = 'red') + class TestMultipleSeriesStandardize: '''Test for MultipleSeries.standardize() diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index b223673f..1f0fe222 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -39,7 +39,7 @@ def gen_ts(model='colored_noise',alpha=1, nt=100, f0=None, m=None, seed=None): 'wrapper for gen_ts in pyleoclim' - + t,v = pyleo.utils.gen_ts(model=model,alpha=alpha, nt=nt, f0=f0, m=m, seed=seed) ts=pyleo.Series(t,v) return ts @@ -51,7 +51,7 @@ def gen_normal(loc=0, scale=1, nt=100): v = np.random.normal(loc=loc, scale=scale, size=nt) ts = pyleo.Series(t,v) return ts - + # def load_data(): # try: # url = 'https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/Development/example_data/scal_signif_benthic.json' @@ -222,12 +222,12 @@ def test_spectral_t6(self, spec_method, eps=0.3): psd = ts2.spectral(method=spec_method) beta = psd.beta_est().beta_est_res['beta'] assert np.abs(beta-alpha) < eps - + @pytest.mark.parametrize('spec_method', ['wwz','cwt']) def test_spectral_t7(self, spec_method,): '''Test the spectral significance testing with pre-generated scalogram objects ''' - + ts = gen_ts() scal = ts.wavelet(method=spec_method) signif = scal.signif_test(number=2,export_scal = True) @@ -251,7 +251,8 @@ def test_bin_t1(self): v_unevenly = np.delete(ts.value, deleted_idx) ts2 = pyleo.Series(time=t_unevenly, value=v_unevenly) - ts2_bin=ts2.bin() + ts2_bin=ts2.bin(keep_log=True) + print(ts2_bin.log[1]) def test_bin_t2(self): ''' Test the bin function by passing arguments''' @@ -291,7 +292,7 @@ def test_stats(self): key = {'mean': 4.5,'median': 4.5,'min': 0.0,'max': 9.0,'std': np.std(t),'IQR': 4.5} assert stats == key - + class TestUiSeriesCenter: '''Test for Series.center() @@ -303,9 +304,12 @@ def test_center(self): ts = gen_ts(nt=500, alpha=alpha) #Call function to be tested - tsc = ts.center() + tsc = ts.center(keep_log=True) + print(tsc.log[1]) + + assert np.abs(tsc.mean) <= np.sqrt(sys.float_info.epsilon) - assert np.abs(tsc.mean) <= np.sqrt(sys.float_info.epsilon) + assert tsc.mean == tsc.log[1]['previous_mean'] class TestUiSeriesStandardize: '''Test for Series.standardize() @@ -317,7 +321,7 @@ def test_standardize(self): ts = gen_ts(nt=500, alpha=alpha) #Call function to be tested - ts_std = ts.standardize() + ts_std = ts.standardize(keep_log=True) #Compare maximum and minimum values value = ts.__dict__['value'] @@ -339,7 +343,7 @@ def test_clean(self): ts = gen_normal() #Call function for testing - ts_clean = ts.clean() + ts_clean = ts.clean(keep_log=True) #Isolate time and value components time = ts_clean.__dict__['time'] @@ -354,7 +358,7 @@ class TestUiSeriesGaussianize: Gaussianize normalizes the series object, so we'll simply test maximum and minimum values''' def test_gaussianize(self): ts = gen_ts() - ts_gauss = ts.gaussianize() + ts_gauss = ts.gaussianize(keep_log=True) value = ts.__dict__['value'] value_std = ts_gauss.__dict__['value'] @@ -422,10 +426,10 @@ class TestUiSeriesSummaryPlot: ''' Test Series.summary_plot() ''' def test_summary_plot_t0(self): - '''Testing that labels are being passed and that psd and scalogram objects dont fail when passed. + '''Testing that labels are being passed and that psd and scalogram objects dont fail when passed. Also testing that we can specify fewer significance tests than those stored in the scalogram object Note that we should avoid pyleo.showfig() in tests. - + Passing pre generated scalogram and psd. ''' ts = gen_ts() @@ -440,7 +444,7 @@ def test_summary_plot_t0(self): period_label=period_label, psd_label=psd_label, value_label=value_label, time_label=time_label ) - + assert ax['scal'].properties()['ylabel'] == period_label, 'Period label is not being passed properly' assert ax['psd'].properties()['xlabel'] == psd_label, 'PSD label is not being passed properly' assert ax['scal'].properties()['xlabel'] == time_label, 'Time label is not being passed properly' @@ -451,16 +455,16 @@ def test_summary_plot_t0(self): def test_summary_plot_t1(self): '''Testing that the bare function works Note that we should avoid pyleo.showfig() in tests. - + Passing just a pre generated psd. ''' ts = gen_ts() scal = ts.wavelet(method='cwt') psd = ts.spectral(method='cwt') fig, ax = ts.summary_plot(psd=psd,scalogram=scal) - - plt.close(fig) - + + plt.close(fig) + class TestUiSeriesCorrelation: ''' Test Series.correlation() @@ -547,8 +551,25 @@ def test_causality_t0(self, method, eps=1): ts1 = pyleo.Series(time=ts.time, value=v1) ts2 = pyleo.Series(time=ts.time, value=v2) - causal_res = ts1.causality(ts2, method=method) - + _ = ts1.causality(ts2, method=method) + + @pytest.mark.parametrize('method', ['liang', 'granger']) + def test_causality_t1(self, method, eps=1): + ''' Generate two series from a same basic series and calculate their correlation + on a specified timespan + Note: NO assert statements for this test yet + ''' + alpha = 1 + nt = 100 + ts = gen_ts(nt=nt,alpha=alpha) + v1 = ts.value + np.random.normal(loc=0, scale=1, size=nt) + v2 = ts.value + np.random.normal(loc=0, scale=2, size=nt) + + ts1 = pyleo.Series(time=ts.time, value=v1) + ts2 = pyleo.Series(time=ts.time, value=v2) + + _ = ts1.causality(ts2, method=method, timespan=(0, 67)) + class TestUISeriesOutliers: ''' Tests for Series.outliers() @@ -570,8 +591,8 @@ def test_outliers_t1(self,remove_outliers): # Get a series object ts2 = pyleo.Series(time = ts.time, value = v_out) # Remove outliers - ts_out, res = ts.outliers(remove=remove_outliers) - + ts_out = ts2.outliers(remove=remove_outliers) + @pytest.mark.parametrize('method', ['kmeans','DBSCAN']) def test_outliers_t2(self,method): @@ -588,12 +609,30 @@ def test_outliers_t2(self,method): # Get a series object ts2 = pyleo.Series(time = ts.time, value = v_out) # Remove outliers - ts_out, res = ts.outliers(method=method) + ts_out = ts2.outliers(method=method) + + @pytest.mark.parametrize('keep_log', [True,False]) + def test_outliers_t3(self,keep_log): + + #Generate data + ts = gen_ts() + #Add outliers + outliers_start = np.mean(ts.value)+5*np.std(ts.value) + outliers_end = np.mean(ts.value)+7*np.std(ts.value) + outlier_values = np.arange(outliers_start,outliers_end,0.1) + index = np.random.randint(0,len(ts.value),6) + v_out = ts.value + for i,ind in enumerate(index): + v_out[ind] = outlier_values[i] + # Get a series object + ts2 = pyleo.Series(time = ts.time, value = v_out) + # Remove outliers + ts_out = ts2.outliers(keep_log=keep_log) class TestUISeriesGkernel: ''' Unit tests for the TestUISeriesGkernel function ''' - def test_interp_t1(self): + def test_gkernel_t1(self): ''' Test the gkernel function with default parameter values''' ts = gen_ts(nt=550, alpha=1.0) @@ -604,9 +643,9 @@ def test_interp_t1(self): v_unevenly = np.delete(ts.value, deleted_idx) ts2 = pyleo.Series(time=t_unevenly, value=v_unevenly) - ts_interp=ts2.gkernel() + ts_interp=ts2.gkernel(keep_log=True) - def test_interp_t2(self): + def test_gkernel_t2(self): ''' Test the gkernel function with specified bandwidth''' ts = gen_ts(nt=550, alpha=1.0) @@ -634,7 +673,7 @@ def test_interp_t1(self): v_unevenly = np.delete(ts.value, deleted_idx) ts = pyleo.Series(time=t_unevenly, value=v_unevenly) - ts_interp=ts.interp() + ts_interp=ts.interp(keep_log=True) def test_interp_t2(self): ''' Test the bin function by passing arguments''' @@ -680,7 +719,7 @@ def test_detrend_t1(self,detrend_method): v_trend = ts.value + nonlinear_trend #create a timeseries object ts2 = pyleo.Series(time=ts.time, value=v_trend) - ts_detrend=ts2.detrend(method=detrend_method) + ts_detrend=ts2.detrend(method=detrend_method, keep_log=True) class TestUISeriesWaveletCoherence(): ''' Test the wavelet coherence @@ -702,7 +741,7 @@ def test_xwave_t1(self): ts2 = gen_ts(model='colored_noise', nt=nt) freq = np.linspace(1/500, 1/2, 20) _ = ts1.wavelet_coherence(ts2,method='wwz',settings={'freq':freq}) - + @pytest.mark.parametrize('mother',['MORLET', 'PAUL', 'DOG']) def test_xwave_t2(self,mother): ''' Test Series.wavelet_coherence() with CWT with mother wavelet specified via `settings` @@ -715,7 +754,7 @@ def test_xwave_t2(self,mother): def test_xwave_t3(self): ''' Test Series.wavelet_coherence() with WWZ on unevenly spaced data ''' - + ts1 = gen_ts(nt=220, alpha=1) ts2 = gen_ts(nt=220, alpha=1) #remove points @@ -749,13 +788,13 @@ def test_wave_t1(self,wave_method): ts = gen_ts(model='colored_noise',nt=n) freq = np.linspace(1/n, 1/2, 20) _ = ts.wavelet(method=wave_method, settings={'freq': freq}) - + def test_wave_t2(self): ''' Test Series.wavelet() ntau option and plot functionality ''' ts = gen_ts(model='colored_noise',nt=200) _ = ts.wavelet(method='wwz',settings={'ntau':10}) - + @pytest.mark.parametrize('mother',['MORLET', 'PAUL', 'DOG']) def test_wave_t3(self,mother): ''' Test Series.wavelet() with different mother wavelets @@ -775,7 +814,7 @@ def test_ssa_t0(self): res = cn.ssa() assert abs(res.pctvar.sum() - 100.0)<0.01 - + def test_ssa_t1(self): '''Test Series.ssa() with var truncation @@ -798,7 +837,7 @@ def test_ssa_t3(self): ''' ts = gen_ts(model = 'colored_noise', nt=500, alpha=1.0) res = ts.ssa(trunc='kaiser') - + def test_ssa_t4(self): '''Test Series.ssa() on Allen&Smith dataset ''' @@ -825,16 +864,55 @@ def test_plot(self): x_plot = line.get_xdata() y_plot = line.get_ydata() - + + pyleo.closefig(fig) + +class TestUiSeriesStripes: + '''Test for Series.stripes() + + Series.stripes outputs a matplotlib figure and axis object, so we will compare the time axis + of the axis object to the time array.''' + + def test_stripes(self): + + ts = gen_normal() + + fig, ax = ts.stripes(ref_period=[61,90],x_offset=2) + + pyleo.closefig(fig) + + +class TestUiSeriesHistplot: + '''Test for Series.histplot()''' + + def test_histplot_t0(self, max_axis = 5): + ts = gen_normal() + + fig, ax = ts.histplot() + + line = ax.lines[0] + + x_plot = line.get_xdata() + y_plot = line.get_ydata() + + assert max(x_plot) < max_axis + + pyleo.closefig(fig) + + def test_histplot_t1(self, vertical = True): + ts = gen_normal() + + fig, ax = ts.histplot(vertical=vertical) + pyleo.closefig(fig) class TestUiSeriesDistplot: '''Test for Series.distplot()''' - def test_distplot_t0(self, max_axis = 5): + def test_histplot_t0(self, max_axis = 5): ts = gen_normal() - - fig, ax = ts.distplot() + + fig, ax = ts.histplot() line = ax.lines[0] @@ -842,15 +920,14 @@ def test_distplot_t0(self, max_axis = 5): y_plot = line.get_ydata() assert max(x_plot) < max_axis - + pyleo.closefig(fig) - def test_distplot_t1(self, vertical = True): + def test_histplot_t1(self, vertical = True): ts = gen_normal() - - fig, ax = ts.distplot(vertical=vertical) - + fig, ax = ts.histplot(vertical=vertical) + pyleo.closefig(fig) class TestUiSeriesFilter: @@ -866,7 +943,7 @@ def test_filter_t0(self, method): sig = sig1 + sig2 ts1 = pyleo.Series(time=t, value=sig1) ts = pyleo.Series(time=t, value=sig) - ts_lp = ts.filter(cutoff_freq=15, method=method) + ts_lp = ts.filter(cutoff_freq=15, method=method, keep_log=True) val_diff = ts_lp.value - ts1.value assert np.mean(val_diff**2) < 0.2 @@ -884,4 +961,52 @@ def test_filter_t1(self, method): ts_bp = ts.filter(cutoff_freq=[15, 25], method=method) val_diff = ts_bp.value - ts2.value assert np.mean(val_diff**2) < 0.1 - \ No newline at end of file + +class TestUISeriesConvertTimeUnit: + '''Tests for Series.convert_time_unit''' + + @pytest.mark.parametrize('keep_log',[False,True]) + def test_convert_time_unit_t0(self,keep_log): + ts = gen_ts(nt=550, alpha=1.0) + ts.time_unit = 'kyr BP' + ts_converted = ts.convert_time_unit('yr BP',keep_log) + np.testing.assert_allclose(ts.time*1000,ts_converted.time,atol=1) + + def test_convert_time_unit_t1(self): + ts = gen_ts(nt=550, alpha=1.0) + ts.time_unit = 'nonsense' + with pytest.raises(ValueError): + ts.convert_time_unit('yr BP') + +class TestUISeriesFillNA: + '''Tests for Series.fill_na''' + + @pytest.mark.parametrize('timespan,nt,ts_dt,dt', [(None,500,8,5),(None,500,1,2),([100,400],500,4,2)]) + def test_fill_na_t0(self,timespan,nt,ts_dt,dt): + t = np.arange(0,nt,ts_dt) + v = np.ones(len(t)) + ts = pyleo.Series(t,v) + ts.fill_na(timespan=timespan,dt=dt) + + @pytest.mark.parametrize('keep_log', [True,False]) + def test_fill_na_t1(self,keep_log): + t = np.arange(0,500,10) + v = np.ones(len(t)) + ts = pyleo.Series(t,v) + ts.fill_na(dt=5,keep_log=keep_log) + +class TestUISeriesSort: + '''Tests for Series.sort''' + + @pytest.mark.parametrize('keep_log',[True,False]) + def test_sort_t0(self,keep_log): + ts = gen_ts(nt=500,alpha=1.0) + ts = ts.sort() + np.all(np.diff(ts.time) >= 0) + + def test_sort_t1(self): + t = np.arange(500,0,-1) + v = np.ones(len(t)) + ts = pyleo.Series(t,v) + ts.sort() + assert np.all(np.diff(ts.time) >= 0) diff --git a/pyleoclim/utils/causality.py b/pyleoclim/utils/causality.py index 683baa04..182ce309 100644 --- a/pyleoclim/utils/causality.py +++ b/pyleoclim/utils/causality.py @@ -26,7 +26,7 @@ def granger_causality(y1, y2, maxlag=1,addconst=True,verbose=True): All four tests give similar results. params_ftest and ssr_ftest are equivalent based on F test which is identical to lmtest:grangertest in R. - Wrapper for the functions described in statsmodel (https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.grangercausalitytests.html) + Wrapper for the functions described in statsmodels (https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.grangercausalitytests.html) Parameters ---------- @@ -51,13 +51,13 @@ def granger_causality(y1, y2, maxlag=1,addconst=True,verbose=True): Notes ----- - The null hypothesis for Granger causality tests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. + The null hypothesis for Granger causality tests is that y2, does NOT Granger cause y1. Granger causality means that past values of y2 have a statistically significant effect on the current value of y1, taking past values of y1 into account as regressors. We reject the null hypothesis that y2 does not Granger cause y1 if the p-values are below a desired threshold (e.g. 0.05). The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. - ‘params_ftest’, ‘ssr_ftest’ are based on F distribution + ‘params_ftest’, ‘ssr_ftest’ are based on the F distribution - ‘ssr_chi2test’, ‘lrtest’ are based on chi-square distribution + ‘ssr_chi2test’, ‘lrtest’ are based on the chi-square distribution See also -------- @@ -503,7 +503,7 @@ def signif_isospec(y1, y2, method, 'tau21_noise_qs': tau21_noise_qs, 'T21_noise_qs': T21_noise_qs, } - #TODO add Granger + #TODO Recode with Surrogate class else: raise KeyError(f'{method} is not a valid method') diff --git a/pyleoclim/utils/correlation.py b/pyleoclim/utils/correlation.py index b1afd7b8..4f1f50db 100644 --- a/pyleoclim/utils/correlation.py +++ b/pyleoclim/utils/correlation.py @@ -479,7 +479,7 @@ def corr_isospec(y1, y2, alpha=0.05, nsim=1000): pyleoclim.utils.correlation.fdr : Determine significance based on the false discovery rate References - --------- + ---------- - Ebisuzaki, W, 1997: A method to estimate the statistical significance of a correlation when the data are serially correlated. diff --git a/pyleoclim/utils/mapping.py b/pyleoclim/utils/mapping.py index 00f218ba..5e5665da 100644 --- a/pyleoclim/utils/mapping.py +++ b/pyleoclim/utils/mapping.py @@ -442,6 +442,7 @@ def compute_dist(lat_r, lon_r, lat_c, lon_c): Returns ------- + dist: list A list of distances in km. """ @@ -459,15 +460,18 @@ def compute_dist(lat_r, lon_r, lat_c, lon_c): def within_distance(distance, radius): """ Returns the index of the records that are within a certain distance - Parameters: - ----------- + Parameters + ---------- + distance: list A list containing the distance + radius: float The radius to be considered Returns ------- + idx: list a list of index """ diff --git a/pyleoclim/utils/plotting.py b/pyleoclim/utils/plotting.py index 8198e671..a98965dc 100644 --- a/pyleoclim/utils/plotting.py +++ b/pyleoclim/utils/plotting.py @@ -10,6 +10,10 @@ import matplotlib.pyplot as plt import pathlib import matplotlib as mpl +import numpy as np +from matplotlib.patches import Rectangle +from matplotlib.collections import PatchCollection +from matplotlib.colors import ListedColormap def scatter_xy(x,y,c=None, figsize=None, xlabel=None, ylabel=None, title=None, @@ -40,7 +44,7 @@ def scatter_xy(x,y,c=None, figsize=None, xlabel=None, ylabel=None, title=None, Limits for the y-axis. The default is None. savefig_settings : dict, optional the dictionary of arguments for plt.savefig(); some notes below: - - "path" must be specified; it can be any existed or non-existed path, + - "path" must be specified; it can be any existing or non-existing path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} The default is None. @@ -135,7 +139,7 @@ def plot_scatter_xy(x1,y1,x2,y2, figsize=None, xlabel=None, the keyword arguments for ax.plot() savefig_settings : dict the dictionary of arguments for plt.savefig(); some notes below: - - "path" must be specified; it can be any existed or non-existed path, + - "path" must be specified; it can be any existing or non-existing path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} @@ -192,7 +196,7 @@ def plot_scatter_xy(x1,y1,x2,y2, figsize=None, xlabel=None, def plot_xy(x, y, figsize=None, xlabel=None, ylabel=None, title=None, xlim=None, ylim=None,savefig_settings=None, ax=None, - legend=True, plot_kwargs=None, lgd_kwargs=None, + legend=True, plot_kwargs=None, lgd_kwargs=None, invert_xaxis=False, invert_yaxis=False): ''' Plot a timeseries @@ -228,7 +232,7 @@ def plot_xy(x, y, figsize=None, xlabel=None, ylabel=None, title=None, (going to be deprecated) savefig_settings : dict the dictionary of arguments for plt.savefig(); some notes below: - - "path" must be specified; it can be any existed or non-existed path, + - "path" must be specified; it can be any existing or non-existing path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} invert_xaxis : bool, optional @@ -290,7 +294,135 @@ def plot_xy(x, y, figsize=None, xlabel=None, ylabel=None, title=None, return fig, ax else: return ax + +def stripes_xy(x, y, ref_period, thickness = 1.0, LIM = 0.75, figsize=None, xlabel=None, + ylabel=None, title=None, xlim=None, savefig_settings=None, ax=None, + x_offset = 0.05, label_size = None, show_xaxis = False, + invert_xaxis=False, top_label = None, bottom_label = None, label_color = None): + ''' + Represent y as an Ed Hawkins "warming stripes" pattern, as a function of x + + Credit: https://matplotlib.org/matplotblog/posts/warming-stripes/ + + Parameters + ---------- + x : array + Independent variable + y : array + Dependent variable + ref_period : 2-tuple or 2-vector + indices of the reference period, in the form "(first, last)" + thickness : float, optional + vertical thickness of the stripe . The default is 1.0 + LIM : float, optional + size of the y-value range (default: 0.7) + figsize : list + a list of two integers indicating the figure size + top_label : str + the "title" label for the stripe + bottom_label : str + the "ylabel" explaining which variable is being plotted + label_size : int + size of the text in labels (in points). Default is the Matplotlib 'axes.labelsize'] rcParams + xlim : list + set the limits of the x axis + x_offset : float + value controlling the horizontal offset between stripes and labels (default = 0.05) + show_xaxis : bool + flag indicating whether or not the x-axis should be shown (default = False) + ax : pyplot.axis + the pyplot.axis object + savefig_settings : dict + the dictionary of arguments for plt.savefig(); some notes below: + - "path" must be specified; it can be any existing or non-existing path, + with or without a suffix; if the suffix is not given in "path", it will follow "format" + - "format" can be one of {"pdf", "eps", "png", "ps"} + invert_xaxis : bool, optional + if True, the x-axis of the plot will be inverted + + Returns + ------- + ax, or fig, ax (if no axes were provided) + ''' + # handle dict defaults + savefig_settings = {} if savefig_settings is None else savefig_settings.copy() + + if ax is None: + fig, ax = plt.subplots(figsize=figsize) + + if label_size is None: + label_size = mpl.rcParams['axes.labelsize'] + + if thickness is None: + thickness = 5*label_size + + ax.get_yaxis().set_visible(False) # remove parasitic lines and labels + ax.get_xaxis().set_visible(show_xaxis) # remove parasitic lines and labels + ax.spines[:].set_visible(False) + + dx = np.diff(x).mean() + xmin = x.min() + xmax = x.max() + dx # inclusive + + # Reference period for the center of the color scale + reference = y[ref_period[0]:ref_period[1]].mean() + + # colormap: the 8 more saturated colors from the 9 blues / 9 reds + cmap = ListedColormap([ + '#08306b', '#08519c', '#2171b5', '#4292c6', + '#6baed6', '#9ecae1', '#c6dbef', '#deebf7', + '#fee0d2', '#fcbba1', '#fc9272', '#fb6a4a', + '#ef3b2c', '#cb181d', '#a50f15', '#67000d', + ]) + + col = PatchCollection([ + Rectangle((yl, 0), 1, 1) + for yl in range(int(xmin), int(xmax)) + ]) + + # set data, colormap and color limits + col.set_array(y) + col.set_cmap(cmap) + col.set_clim(reference - LIM, reference + LIM) + ax.add_collection(col) + # adjust axes + ax.set_ylim(0, thickness) + ax.set_xlim(xmin, xmax); + + # add label to the right + #offset = y_offsets[column] / 72 + #trans = mtransforms.ScaledTranslation(0, offset, fig.dpi_scale_trans) + #trans = ax.transData #+ trans + + ypos = 0.4*thickness + ax.text(xmax+dx+x_offset, 0.6*thickness, top_label, color=label_color, + fontsize=label_size, fontweight = 'bold') + ax.text(xmax+dx+x_offset, 0.2*thickness, bottom_label, color=label_color, + fontsize=label_size) + + if xlabel is not None: + ax.set_xlabel(xlabel) + + if ylabel is not None: + ax.set_ylabel(ylabel) + + if title is not None: + ax.set_title(title) + + if xlim is not None: + ax.set_xlim(xlim) + + if invert_xaxis: + ax.invert_xaxis() + + if 'fig' in locals(): + fig.tight_layout() + if 'path' in savefig_settings: + savefig(fig, settings=savefig_settings) + return fig, ax + else: + return ax def closefig(fig=None): '''Close the figure @@ -320,7 +452,7 @@ def savefig(fig, path=None, dpi=300, settings={}, verbose=True): settings : dict the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified in settings if not assigned with the keyword argument; - it can be any existed or non-existed path, with or without a suffix; + it can be any existing or non-existing path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} verbose : bool, {True,False} @@ -376,12 +508,12 @@ def set_style(style='journal', font_scale=1.0, dpi=300): ''' font_dict = { - 'font.size': 12, - 'axes.labelsize': 12, - 'axes.titlesize': 14, - 'xtick.labelsize': 11, - 'ytick.labelsize': 11, - 'legend.fontsize': 11, + 'font.size': 10, + 'axes.labelsize': 11, + 'axes.titlesize': 12, + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'legend.fontsize': 9, } style_dict = {} diff --git a/pyleoclim/utils/spectral.py b/pyleoclim/utils/spectral.py index b35d778a..f4336d73 100644 --- a/pyleoclim/utils/spectral.py +++ b/pyleoclim/utils/spectral.py @@ -1091,7 +1091,7 @@ def psd_fBM(freq, ts, H): Hurst exponent, should be in (0, 1) Returns - -------- + ------- psd : array power spectral density diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index 4bdfceef..ae24f5f3 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -144,8 +144,6 @@ def ar1_sim(y, p, t=None): # simulate AR(1) model for each column for i in np.arange(p): - #ysim[:, i] = sm.tsa.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=50, sigma=sig_n) # old statsmodels syntax - #ysim[:, i] = sm.tsa.ArmaProcess(ar, ma).generate_sample(nsample=n, scale=sig_n, burnin=50) # statsmodels v0.11.1-? ysim[:, i] = arma_generate_sample(ar, ma, nsample=n, scale=sig_n, burnin=50) # statsmodels v0.12+ else: # tau_est = ar1_fit(y, t=t, detrend=detrend, params=params) diff --git a/pyleoclim/utils/tsutils.py b/pyleoclim/utils/tsutils.py index fde9ae8d..9826f869 100644 --- a/pyleoclim/utils/tsutils.py +++ b/pyleoclim/utils/tsutils.py @@ -219,7 +219,7 @@ def gkernel(t,y, h = 3.0, step=None,start=None,stop=None, step_style = 'max'): Rehfeld, K., Marwan, N., Heitzig, J., and Kurths, J.: Comparison of correlation analysis techniques for irregularly sampled time series, Nonlin. Processes Geophys., - 18, 389–404, https://doi.org/10.5194/npg-18-389-2011, 2011. + 18, 389–404, doi:10.5194/npg-18-389-2011, 2011. See also -------- @@ -268,10 +268,11 @@ def gkernel(t,y, h = 3.0, step=None,start=None,stop=None, step_style = 'max'): def increments(x,step_style='median'): - ''' Establishes the increments of a numerical array: start, stop, and representative step. + '''Establishes the increments of a numerical array: start, stop, and representative step. Parameters ---------- + x : array step_style : str @@ -284,6 +285,7 @@ def increments(x,step_style='median'): Returns ------- + start : float min(x) stop : float @@ -354,7 +356,7 @@ def interp(x,y, interp_type='linear', step=None,start=None,stop=None, step_style See Also -------- - pyleoclim.utils.tsutils.increment : Establishes the increments of a numerical array + pyleoclim.utils.tsutils.increments : Establishes the increments of a numerical array pyleoclim.utils.tsutils.bin : Bin the values @@ -708,7 +710,10 @@ def detrend(y, x=None, method="emd", n=1, sg_kwargs=None): ------- ys : array - The detrended timeseries. + The detrended version of y. + + trend : array + The removed trend. Only non-empty for EMD and Savitzy-Golay methods, since SciPy detrending does not retain the trends See also -------- @@ -725,8 +730,10 @@ def detrend(y, x=None, method="emd", n=1, sg_kwargs=None): if method == "linear": ys = signal.detrend(y,type='linear') + trend = y - ys elif method == 'constant': ys = signal.detrend(y,type='constant') + trend = y - ys elif method == "savitzky-golay": # Check that the timeseries is uneven and interpolate if needed if x is None: @@ -742,21 +749,20 @@ def detrend(y, x=None, method="emd", n=1, sg_kwargs=None): # Now filter y_filt = savitzky_golay(y_interp,**sg_kwargs) # Put it all back on the original x axis - y_filt_x = np.interp(x,x_interp,y_filt) - ys = y-y_filt_x + trend = np.interp(x,x_interp,y_filt) + ys = y - trend elif method == "emd": imfs = EMD(y).decompose() if np.shape(imfs)[0] == 1: trend = np.zeros(np.size(y)) else: - # trend = imfs[-1] trend = np.sum(imfs[-n:], axis=0) # remove the n smoothest modes ys = y - trend else: raise KeyError('Not a valid detrending method') - return ys + return ys, trend def calculate_distances(ys, n_neighbors=None, NN_kwargs=None): """ @@ -1041,8 +1047,8 @@ def eff_sample_size(y, detrend_flag=False): neff : float The effective sample size - Reference - --------- + References + ---------- Thiébaux HJ and Zwiers FW, 1984: The interpretation and estimation of effective sample sizes. Journal of Climate and Applied Meteorology 23: 800–811. diff --git a/pyleoclim/utils/wavelet.py b/pyleoclim/utils/wavelet.py index d24b10a0..89b299dd 100644 --- a/pyleoclim/utils/wavelet.py +++ b/pyleoclim/utils/wavelet.py @@ -56,7 +56,7 @@ class AliasFilter(object): ''' def alias_filter(self, freq, pwr, fs, fc, f_limit, avgs): - ''' anti_alias filter + ''' The anti-aliasing filter Parameters ---------- @@ -2388,8 +2388,8 @@ def cwt(ys,ts,freq=None,freq_method='log',freq_kwargs={}, scale = None, detrend= wave, coi = tc_wavelet(ys, dt, scale, mother, param, pad) amplitude=np.abs(wave) - Results = collections.namedtuple('Results', ['amplitude', 'coi', 'freq', 'time', 'scale', 'coeff', 'mother','param']) - res = Results(amplitude=amplitude.T, coi=coi, freq=freq, time=ts, scale=scale, coeff=wave, mother=mother,param=param) + Results = collections.namedtuple('Results', ['amplitude', 'coi', 'freq', 'time', 'scale', 'coeff', 'mother','param','gaussianize','standardize']) + res = Results(amplitude=amplitude.T, coi=coi, freq=freq, time=ts, scale=scale, coeff=wave, mother=mother,param=param, gaussianize=gaussianize,standardize=standardize) return res diff --git a/setup.py b/setup.py index 52833316..63cda1a3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup, find_packages -version = '0.9.1' +version = '0.10.0' # Read the readme file contents into variable def read(fname):